Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
d2addb09
Commit
d2addb09
authored
Aug 10, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Motion blur compensation
parent
862257b9
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
60 additions
and
32 deletions
+60
-32
TileProcessor.cuh
src/TileProcessor.cuh
+32
-12
geometry_correction.cu
src/geometry_correction.cu
+5
-5
geometry_correction.h
src/geometry_correction.h
+9
-3
test_tp.cu
src/test_tp.cu
+14
-12
No files found.
src/TileProcessor.cuh
View file @
d2addb09
...
@@ -862,6 +862,7 @@ __device__ void convertCorrectTile(
...
@@ -862,6 +862,7 @@ __device__ void convertCorrectTile(
const float centerX,
const float centerX,
const float centerY,
const float centerY,
const int txy,
const int txy,
const float tscale,
const size_t dstride, // in floats (pixels)
const size_t dstride, // in floats (pixels)
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float * clt_kernels, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float * clt_kernels, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
...
@@ -3118,7 +3119,7 @@ __global__ void convert_correct_tiles(
...
@@ -3118,7 +3119,7 @@ __global__ void convert_correct_tiles(
int thread0 = threadIdx.x & 1; // 0,1
int thread0 = threadIdx.x & 1; // 0,1
int thread12 = threadIdx.x >>1; // now 0..3 (total number == (DTT_SIZE), will not change
int thread12 = threadIdx.x >>1; // now 0..3 (total number == (DTT_SIZE), will not change
float * tp = tp0 +
tp_task_xy_offset
+ threadIdx.x;
float * tp = tp0 +
TP_TASK_XY_OFFSET
+ threadIdx.x;
if (thread12 < num_cams) {
if (thread12 < num_cams) {
tt[tile_in_block].xy[thread12][thread0] = *(tp); // gpu_task -> xy[thread12][thread0];
tt[tile_in_block].xy[thread12][thread0] = *(tp); // gpu_task -> xy[thread12][thread0];
}
}
...
@@ -3135,7 +3136,9 @@ __global__ void convert_correct_tiles(
...
@@ -3135,7 +3136,9 @@ __global__ void convert_correct_tiles(
if (threadIdx.x == 0){ // only one thread calculates, others - wait
if (threadIdx.x == 0){ // only one thread calculates, others - wait
tt[tile_in_block].task = *(int *) (tp0++); // get first integer value
tt[tile_in_block].task = *(int *) (tp0++); // get first integer value
tt[tile_in_block].txy = *(int *) (tp0++); // get second integer value
tt[tile_in_block].txy = *(int *) (tp0++); // get second integer value
tt[tile_in_block].target_disparity = *(tp0++); //
tt[tile_in_block].target_disparity = *(tp0); //
tp0 +=3; // skip centerXY and previous increment (was tt[tile_in_block].target_disparity = *(tp0++);
tt[tile_in_block].scale = *(tp0++); // get scale to multiply before accumulating/saving
}
}
// float centerXY[2] is not used/copied here
// float centerXY[2] is not used/copied here
...
@@ -3167,7 +3170,8 @@ __global__ void convert_correct_tiles(
...
@@ -3167,7 +3170,8 @@ __global__ void convert_correct_tiles(
lpf_mask, // const int lpf_mask,
lpf_mask, // const int lpf_mask,
tt[tile_in_block].xy[ncam][0], // const float centerX,
tt[tile_in_block].xy[ncam][0], // const float centerX,
tt[tile_in_block].xy[ncam][1], // const float centerY,
tt[tile_in_block].xy[ncam][1], // const float centerY,
tt[tile_in_block].txy, // const int txy,
tt[tile_in_block].txy, // const int txy,
tt[tile_in_block].scale, // const float tscale,
dstride, // size_t dstride, // in floats (pixels)
dstride, // size_t dstride, // in floats (pixels)
(float * )(clt_tile [tile_in_block]), // float clt_tile [TILES_PER_BLOCK][NUM_CAMS][num_colors][4][DTT_SIZE][DTT_SIZE])
(float * )(clt_tile [tile_in_block]), // float clt_tile [TILES_PER_BLOCK][NUM_CAMS][num_colors][4][DTT_SIZE][DTT_SIZE])
(float * )(clt_kernels[tile_in_block]), // float clt_tile [num_colors][4][DTT_SIZE][DTT_SIZE],
(float * )(clt_kernels[tile_in_block]), // float clt_tile [num_colors][4][DTT_SIZE][DTT_SIZE],
...
@@ -4457,6 +4461,7 @@ __device__ void normalizeTileAmplitude(
...
@@ -4457,6 +4461,7 @@ __device__ void normalizeTileAmplitude(
* @param centerX full X-offset of the tile center, calculated from the geometry, distortions and disparity
* @param centerX full X-offset of the tile center, calculated from the geometry, distortions and disparity
* @param centerY full Y-offset of the tile center
* @param centerY full Y-offset of the tile center
* @param txy integer value combining tile X (low 16 bits) and tile Y (high 16 bits)
* @param txy integer value combining tile X (low 16 bits) and tile Y (high 16 bits)
* @param tscale float value to scale result. 0 - set. >0 scale and set, <0 subtract
* @param dstride stride (in floats) for the input Bayer images
* @param dstride stride (in floats) for the input Bayer images
* @param clt_tile image tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
* @param clt_tile image tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
* @param clt_kernels kernel tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
* @param clt_kernels kernel tile in shared memory [4][DTT_SIZE][DTT_SIZE1] (just allocated)
...
@@ -4482,6 +4487,7 @@ __device__ void convertCorrectTile(
...
@@ -4482,6 +4487,7 @@ __device__ void convertCorrectTile(
const float centerX,
const float centerX,
const float centerY,
const float centerY,
const int txy,
const int txy,
const float tscale,
const size_t dstride, // in floats (pixels)
const size_t dstride, // in floats (pixels)
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float * clt_kernels, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float * clt_kernels, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
...
@@ -5078,18 +5084,32 @@ __device__ void convertCorrectTile(
...
@@ -5078,18 +5084,32 @@ __device__ void convertCorrectTile(
#endif
#endif
if (tscale == 0) { // just set w/o scaling
#pragma unroll
#pragma unroll
for (int j = 0; j < DTT_SIZE * 4; j++){ // all 4 components, 8 rows
for (int j = 0; j < DTT_SIZE * 4; j++){ // all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
// shared memory tiles use DTT_SIZE1
*clt_dst = *clt_src;
*clt_dst = *clt_src;
clt_src += DTT_SIZE1;
clt_src += DTT_SIZE1;
clt_dst += DTT_SIZE;
clt_dst += DTT_SIZE;
}
} else if (tscale > 0) { // positive - scale and set. For motion blur positive should be first
#pragma unroll
for (int j = 0; j < DTT_SIZE * 4; j++){ // all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
*clt_dst = *clt_src * tscale;
clt_src += DTT_SIZE1;
clt_dst += DTT_SIZE;
}
} else { // negative - scale and subtract from existing. For motion blur positive should be first
#pragma unroll
for (int j = 0; j < DTT_SIZE * 4; j++){ // all 4 components, 8 rows
// shared memory tiles use DTT_SIZE1
*clt_dst += *clt_src * tscale;
clt_src += DTT_SIZE1;
clt_dst += DTT_SIZE;
}
}
}
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
// just for testing perform imclt, save result to clt_kernels
//#endif
}
}
...
...
src/geometry_correction.cu
View file @
d2addb09
...
@@ -460,11 +460,11 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -460,11 +460,11 @@ extern "C" __global__ void get_tiles_offsets(
// common code, calculated in parallel
// common code, calculated in parallel
/// int cxy = gpu_tasks[task_num].txy;
/// int cxy = gpu_tasks[task_num].txy;
/// float disparity = gpu_tasks[task_num].target_disparity;
/// float disparity = gpu_tasks[task_num].target_disparity;
float disparity = * (gpu_ftasks + task_size * task_num +
2
);
float disparity = * (gpu_ftasks + task_size * task_num +
TP_TASK_DISPARITY_OFFSET
);
float *centerXY = gpu_ftasks + task_size * task_num +
tp_task_centerXY_offset
;
float *centerXY = gpu_ftasks + task_size * task_num +
TP_TASK_CENTERXY_OFFSET
;
float px = *(centerXY);
float px = *(centerXY);
float py = *(centerXY + 1);
float py = *(centerXY + 1);
int cxy = *(int *) (gpu_ftasks + task_size * task_num +
1
);
int cxy = *(int *) (gpu_ftasks + task_size * task_num +
TP_TASK_TXY_OFFSET
);
int tileX = (cxy & 0xffff);
int tileX = (cxy & 0xffff);
int tileY = (cxy >> 16);
int tileY = (cxy >> 16);
...
@@ -705,7 +705,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -705,7 +705,7 @@ extern "C" __global__ void get_tiles_offsets(
/// gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
/// gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
/// gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
/// gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
/// gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
/// gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
float * disp_dist_p = gpu_ftasks + task_size * task_num +
tp_task_xy_offset
+ num_cams* 2 + ncam * 4; // ncam = threadIdx.x, so each thread will have different offset
float * disp_dist_p = gpu_ftasks + task_size * task_num +
TP_TASK_XY_OFFSET
+ num_cams* 2 + ncam * 4; // ncam = threadIdx.x, so each thread will have different offset
*(disp_dist_p++) = disp_dist[0]; // global memory
*(disp_dist_p++) = disp_dist[0]; // global memory
*(disp_dist_p++) = disp_dist[1];
*(disp_dist_p++) = disp_dist[1];
*(disp_dist_p++) = disp_dist[2];
*(disp_dist_p++) = disp_dist[2];
...
@@ -768,7 +768,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -768,7 +768,7 @@ extern "C" __global__ void get_tiles_offsets(
// gpu_tasks[task_num].xy[ncam][1] = pXY[1];
// gpu_tasks[task_num].xy[ncam][1] = pXY[1];
// float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// .xy goes right after 3 commonn (tak, txy and target_disparity
// .xy goes right after 3 commonn (tak, txy and target_disparity
float * tile_xy_p = gpu_ftasks + task_size * task_num +
tp_task_xy_offset
+ ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
float * tile_xy_p = gpu_ftasks + task_size * task_num +
TP_TASK_XY_OFFSET
+ ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
*(tile_xy_p++) = pXY[0]; // global memory
*(tile_xy_p++) = pXY[0]; // global memory
*(tile_xy_p++) = pXY[1]; // global memory
*(tile_xy_p++) = pXY[1]; // global memory
}
}
...
...
src/geometry_correction.h
View file @
d2addb09
...
@@ -64,13 +64,19 @@ struct tp_task {
...
@@ -64,13 +64,19 @@ struct tp_task {
float
target_disparity
;
float
target_disparity
;
float
centerXY
[
2
];
// "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
float
centerXY
[
2
];
// "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
// if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
// if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
float
xy
[
NUM_CAMS
][
2
];
// scale == 0 - old way, just set. Scale !=0 - accumulate. Or make > 0 - set too? only negative - subtract?
float
scale
;
// multiply during direct conversion before accumulating in TD - used for motion blur correction
float
xy
[
NUM_CAMS
][
2
];
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
};
};
#define get_task_size(x) (sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - x))
#define get_task_size(x) (sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - x))
#define tp_task_xy_offset 5
#define TP_TASK_TASK_OFFSET 0
#define tp_task_centerXY_offset 3
#define TP_TASK_TXY_OFFSET 1
#define TP_TASK_DISPARITY_OFFSET 2
#define TP_TASK_CENTERXY_OFFSET 3
#define TP_TASK_SCALE_OFFSET 5
#define TP_TASK_XY_OFFSET 6
struct
corr_vector
{
struct
corr_vector
{
float
tilt
[
NUM_CAMS
-
1
];
// 0..2
float
tilt
[
NUM_CAMS
-
1
];
// 0..2
...
...
src/test_tp.cu
View file @
d2addb09
...
@@ -1109,10 +1109,12 @@ int main(int argc, char **argv)
...
@@ -1109,10 +1109,12 @@ int main(int argc, char **argv)
int task_txy = tx + (ty << 16);
int task_txy = tx + (ty << 16);
float task_target_disparity = DBG_DISPARITY;
float task_target_disparity = DBG_DISPARITY;
float * tp = ftask_data + task_size * nt;
float * tp = ftask_data + task_size * nt;
*(tp++) = *(float *) &task_task;
*(tp + TP_TASK_TASK_OFFSET) = *(float *) &task_task;
*(tp++) = *(float *) &task_txy;
*(tp + TP_TASK_TXY_OFFSET) = *(float *) &task_txy;
*(tp++) = task_target_disparity;
*(tp + TP_TASK_DISPARITY_OFFSET) = task_target_disparity;
tp += 2; // skip centerX, centerY
// tp += 2; // skip centerX, centerY
*(tp + TP_TASK_SCALE_OFFSET) = 0; // 0.5f; // ,0; // scale, 0 - old way, just set
tp+= TP_TASK_XY_OFFSET;
for (int ncam = 0; ncam < num_cams; ncam++) {
for (int ncam = 0; ncam < num_cams; ncam++) {
*(tp++) = tile_coords_h[ncam][nt][0];
*(tp++) = tile_coords_h[ncam][nt][0];
*(tp++) = tile_coords_h[ncam][nt][1];
*(tp++) = tile_coords_h[ncam][nt][1];
...
@@ -1385,15 +1387,15 @@ int main(int argc, char **argv)
...
@@ -1385,15 +1387,15 @@ int main(int argc, char **argv)
printf("new_task txy = 0x%x\n", *(int *) (ftask_data1 + task_size * DBG_TILE + 1)) ; // task_data1[DBG_TILE].txy);
printf("new_task txy = 0x%x\n", *(int *) (ftask_data1 + task_size * DBG_TILE + 1)) ; // task_data1[DBG_TILE].txy);
for (int ncam = 0; ncam < num_cams; ncam++){
for (int ncam = 0; ncam < num_cams; ncam++){
printf("camera %d pX old %f new %f diff = %f\n", ncam,
printf("camera %d pX old %f new %f diff = %f\n", ncam,
*(ftask_data + task_size * DBG_TILE +
tp_task_xy_offset
+ 2*ncam + 0),
*(ftask_data + task_size * DBG_TILE +
TP_TASK_XY_OFFSET
+ 2*ncam + 0),
*(ftask_data1 + task_size * DBG_TILE +
tp_task_xy_offset
+ 2*ncam + 0),
*(ftask_data1 + task_size * DBG_TILE +
TP_TASK_XY_OFFSET
+ 2*ncam + 0),
(*(ftask_data + task_size * DBG_TILE +
tp_task_xy_offset
+ 2*ncam + 0)) -
(*(ftask_data + task_size * DBG_TILE +
TP_TASK_XY_OFFSET
+ 2*ncam + 0)) -
(*(ftask_data1 + task_size * DBG_TILE +
tp_task_xy_offset
+ 2*ncam + 0)));
(*(ftask_data1 + task_size * DBG_TILE +
TP_TASK_XY_OFFSET
+ 2*ncam + 0)));
printf("camera %d pY old %f new %f diff = %f\n", ncam,
printf("camera %d pY old %f new %f diff = %f\n", ncam,
*(ftask_data +
task_size * DBG_TILE + tp_task_xy_offset
+ 2*ncam + 1),
*(ftask_data +
task_size * DBG_TILE + TP_TASK_XY_OFFSET
+ 2*ncam + 1),
*(ftask_data1 +
task_size * DBG_TILE + tp_task_xy_offset
+ 2*ncam + 1),
*(ftask_data1 +
task_size * DBG_TILE + TP_TASK_XY_OFFSET
+ 2*ncam + 1),
(*(ftask_data +
task_size * DBG_TILE + tp_task_xy_offset
+ 2*ncam + 1)) -
(*(ftask_data +
task_size * DBG_TILE + TP_TASK_XY_OFFSET
+ 2*ncam + 1)) -
(*(ftask_data1 + task_size * DBG_TILE +
tp_task_xy_offset
+ 2*ncam + 1)));
(*(ftask_data1 + task_size * DBG_TILE +
TP_TASK_XY_OFFSET
+ 2*ncam + 1)));
}
}
#endif //#ifdef DBG_TILE
#endif //#ifdef DBG_TILE
#endif // TEST_GEOM_CORR
#endif // TEST_GEOM_CORR
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment