Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
f72f2a9c
Commit
f72f2a9c
authored
May 17, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed interscene correlation bug
parent
13b9ba89
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
102 additions
and
30 deletions
+102
-30
TileProcessor.cuh
src/TileProcessor.cuh
+26
-24
test_tp.cu
src/test_tp.cu
+76
-6
No files found.
src/TileProcessor.cuh
View file @
f72f2a9c
...
...
@@ -1303,30 +1303,31 @@ extern "C" __global__ void correlate2D_inter( // only results in TD
gpu_corr_indices, // int * gpu_corr_indices, // array of correlation tasks
pnum_corr_tiles); // int * pnum_corr_tiles); // pointer to the length of correlation tasks array
cudaDeviceSynchronize();
int num_corr_tiles = (*pnum_corr_tiles) * num_sel_sensors/ (num_sel_sensors + 1); // remove sum from count
int num_corr_tiles_with_sum = (*pnum_corr_tiles);
int num_corr_tiles_wo_sum = num_corr_tiles_with_sum * num_sel_sensors/ (num_sel_sensors + 1); // remove sum from count
dim3 threads_corr(CORR_THREADS_PER_TILE, CORR_TILES_PER_BLOCK, 1);
dim3 grid_corr((num_corr_tiles + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
dim3 grid_corr((num_corr_tiles
_wo_sum
+ CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
correlate2D_inter_inner<<<grid_corr,threads_corr>>>( // will only process to TD, no normalisations and back conversion
num_cams,
// int
num_cams,
num_sel_sensors,
// int
num_sel_sensors, // number of sensors to correlate (not counting sum of all)
gpu_clt,
// float
** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
gpu_clt_ref,
// float
** gpu_clt_ref, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
colors,
// int
colors, // number of colors (3/1)
scale0,
// float
scale0, // scale for R
scale1,
// float
scale1, // scale for B
scale2,
// float
scale2, // scale for G
num_corr_tiles
, // int
num_corr_tiles, // number of correlation tiles to process (here it includes sum for compatibility with intra format)
gpu_corr_indices,
// int
* gpu_corr_indices, // packed tile + sensor (0xff - sum)
corr_stride,
// size_t
corr_stride, // in floats
gpu_corrs);
// float
* gpu_corrs) // correlation output data (either pixel domain or transform domain
num_cams,
// int
num_cams,
num_sel_sensors,
// int
num_sel_sensors, // number of sensors to correlate (not counting sum of all)
gpu_clt,
// float
** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
gpu_clt_ref,
// float
** gpu_clt_ref, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
colors,
// int
colors, // number of colors (3/1)
scale0,
// float
scale0, // scale for R
scale1,
// float
scale1, // scale for B
scale2,
// float
scale2, // scale for G
num_corr_tiles
_with_sum, // int
num_corr_tiles, // number of correlation tiles to process (here it includes sum for compatibility with intra format)
gpu_corr_indices,
// int
* gpu_corr_indices, // packed tile + sensor (0xff - sum)
corr_stride,
// size_t
corr_stride, // in floats
gpu_corrs);
// float
* gpu_corrs) // correlation output data (either pixel domain or transform domain
dim3 grid_combine((num_tiles + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
combine_inter<<<grid_combine,threads_corr>>>( // combine per-senor interscene correlations
num_sel_sensors,
// int
num_sel_sensors, // number of sensors to correlate (not counting sum of all)
num_corr_tiles
, // int
num_corr_tiles, // number of correlation tiles to process (here it includes sum)
gpu_corr_indices,
// int * gpu_corr_indices, // packed tile+pair
corr_stride,
// size_t
corr_stride, // in floats
gpu_corrs);
// float
* gpu_corrs); // correlation output data (either pixel domain or transform domain
num_sel_sensors,
// int
num_sel_sensors, // number of sensors to correlate (not counting sum of all)
num_corr_tiles
_with_sum, // int
num_corr_tiles, // number of correlation tiles to process (here it includes sum)
gpu_corr_indices,
// int * gpu_corr_indices, // packed tile+pair NOT USED
corr_stride,
// size_t
corr_stride, // in floats
gpu_corrs);
// float
* gpu_corrs); // correlation output data (either pixel domain or transform domain
}
}
}
...
...
@@ -1519,6 +1520,7 @@ extern "C" __global__ void correlate2D_inter_inner( // will only process to TD,
__syncthreads();// __syncwarp();
} // if (color == 1){ // LPF only after B (nothing in mono)
} // for (int color = 0; color < colors; color++){
__syncthreads();// __syncwarp();
float *mem_corr = gpu_corrs + corr_stride * corr_offset + threadIdx.x;
float *clt = clt_corr + threadIdx.x;
#pragma unroll
...
...
@@ -2901,11 +2903,11 @@ __global__ void index_correlate(
__global__ void index_inter_correlate(
int num_cams,
int sel_sensors,
float
* gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles, // number of tiles in task
int
width, // number of tiles in a row
int *
gpu_corr_indices, // array of correlation tasks
int *
pnum_corr_tiles) // pointer to the length of correlation tasks array
float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * gpu_corr_indices, // array of correlation tasks
int * pnum_corr_tiles) // pointer to the length of correlation tasks array
{
int num_tile = blockIdx.x * blockDim.x + threadIdx.x;
if (num_tile >= num_tiles){
...
...
src/test_tp.cu
View file @
f72f2a9c
...
...
@@ -1116,6 +1116,7 @@ int main(int argc, char **argv)
float * corr_img; // = (float *)malloc(corr_img_size * sizeof(float));
float * cpu_corr; // = (float *)malloc(rslt_corr_size * sizeof(float));
float * cpu_corr_td;
int * cpu_corr_indices; // = (int *) malloc(num_corr_indices * sizeof(int));
...
...
@@ -1799,8 +1800,8 @@ int main(int argc, char **argv)
#ifdef CORR_INTER_SELF
int sel_sensors = 0xffff; // 0x7fff; // 0xffff;
int num_sel_sen
os
rs = 16; // 15; // 16;
num_pairs = num_sel_sen
os
rs+1;
int num_sel_sen
so
rs = 16; // 15; // 16;
num_pairs = num_sel_sen
so
rs+1;
num_corr_indices = num_pairs * num_tiles;
StopWatchInterface *timerINTERSELF = 0;
sdkCreateTimer(&timerINTERSELF);
...
...
@@ -1838,7 +1839,6 @@ int main(int argc, char **argv)
sizeof(int),
cudaMemcpyDeviceToHost));
checkCudaErrors(cudaDeviceSynchronize());
corr2D_normalize<<<1,1>>>(
num_corrs, //tp_task_size, // int num_corr_tiles, // number of correlation tiles to process
dstride_corr_td/sizeof(float), // const size_t corr_stride_td, // in floats
...
...
@@ -1884,7 +1884,7 @@ int main(int argc, char **argv)
}
// int num_pairs = 120;
// int sel_sensors = 0xffff;
// int num_sel_sen
os
rs = 16;
// int num_sel_sen
so
rs = 16;
// int corr_size = 2 * CORR_OUT_RAD + 1; // 15
// int num_tiles = tp_task_size; // TILESX * TILESYA; //Was this on 01/22/2022
// int num_corr_indices = num_pairs * num_tiles;
...
...
@@ -1893,7 +1893,7 @@ int main(int argc, char **argv)
int ctt = ( cpu_corr_indices[ict] >> CORR_NTILE_SHIFT);
int cpair = cpu_corr_indices[ict] & ((1 << CORR_NTILE_SHIFT) - 1);
if (cpair == 0xff){
cpair = num_sel_sen
os
rs;
cpair = num_sel_sen
so
rs;
}
int ty = ctt / TILESX;
int tx = ctt % TILESX;
...
...
@@ -1907,7 +1907,6 @@ int main(int argc, char **argv)
}
}
}
#ifndef NSAVE_CORR
printf("Writing interscene phase correlation data to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n",
result_inter_td_norm_file, (TILESX*16),(TILESYA*16), num_pairs, (corr_img_size * sizeof(float)) ) ;
...
...
@@ -1916,6 +1915,77 @@ int main(int argc, char **argv)
corr_img_size, // int size, // length in elements
result_inter_td_norm_file); // const char * path) // file path
#endif
#if 1
int rslt_corr_size_td = num_corrs * DTT_SIZE2*DTT_SIZE2;
cpu_corr_td = (float *)malloc(rslt_corr_size_td * sizeof(float));
checkCudaErrors(cudaMemcpy2D(
cpu_corr_td,
(DTT_SIZE2*DTT_SIZE2) * sizeof(float),
gpu_corrs_td,
dstride_corr_td,
(DTT_SIZE2*DTT_SIZE2) * sizeof(float),
num_corrs,
cudaMemcpyDeviceToHost));
// Reuse the same corr_img for TD images - each tile is still 16x16 (corr was 15x15 and gap)
for (int ict = 0; ict < num_corr_indices; ict++){
int ctt = ( cpu_corr_indices[ict] >> CORR_NTILE_SHIFT);
int cpair = cpu_corr_indices[ict] & ((1 << CORR_NTILE_SHIFT) - 1);
if (cpair == 0xff){
cpair = num_sel_sensors;
}
int ty = ctt / TILESX;
int tx = ctt % TILESX;
int src_offs0 = ict * DTT_SIZE2*DTT_SIZE2;
int dst_offs0 = cpair * (num_tiles * 16 * 16) + (ty * 16 * TILESX * 16) + (tx * 16);
for (int iquad = 0; iquad < 4; iquad ++) {
int iqy = (iquad >> 1) & 1;
int iqx = (iquad >> 0) & 1;
for (int iy = 0; iy < DTT_SIZE; iy++){
int src_offs = src_offs0 + iy * DTT_SIZE + iquad * DTT_SIZE * DTT_SIZE;
int dst_offs = dst_offs0 + (iy + DTT_SIZE * iqy)* (TILESX * 16) + iqx * DTT_SIZE;
for (int ix = 0; ix < DTT_SIZE; ix++){
corr_img[dst_offs++] = cpu_corr_td[src_offs++];
}
}
}
}
#ifndef NSAVE_CORR
printf("Writing interscene phase correlation TD data");
writeFloatsToFile(
corr_img, // float * data, // allocated array
corr_img_size, // int size, // length in elements
"clt/aux_interscene-TD.raw"); // const char * path) // file path
#endif
int corr_index_img_length = TILESX*TILESY*(num_sel_sensors+1) ;
float *corr_index_img = (float *)malloc(corr_index_img_length * sizeof(float));
for (int i = 0; i < corr_index_img_length; i++){
corr_index_img[i] = NAN;
}
for (int ict = 0; ict < num_corr_indices; ict++){
int ctt = ( cpu_corr_indices[ict] >> CORR_NTILE_SHIFT);
int cpair = cpu_corr_indices[ict] & ((1 << CORR_NTILE_SHIFT) - 1);
if (cpair == 0xff){
cpair = num_sel_sensors;
}
int ty = ctt / TILESX;
int tx = ctt % TILESX;
corr_index_img[cpair*TILESX*TILESY + TILESX*ty + tx] = ict; // cpu_corr_indices[ict];
}
printf("Writing interscene indices\n");
writeFloatsToFile(
corr_index_img, // float * data, // allocated array
corr_index_img_length, // int size, // length in elements
"clt/aux_inter-indices.raw"); // const char * path) // file path
free (corr_index_img);
free (cpu_corr_td);
#endif
free (cpu_corr);
free (cpu_corr_indices);
free (corr_img);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment