Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
26689442
Commit
26689442
authored
Apr 19, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
more debugging
parent
8caaa2db
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
47 additions
and
17 deletions
+47
-17
TileProcessor.cuh
src/TileProcessor.cuh
+29
-7
geometry_correction.cu
src/geometry_correction.cu
+12
-5
tp_defines.h
src/tp_defines.h
+6
-5
No files found.
src/TileProcessor.cuh
View file @
26689442
...
@@ -2149,7 +2149,8 @@ __global__ void textures_accumulate( // (8,4,1) (N,1,1)
...
@@ -2149,7 +2149,8 @@ __global__ void textures_accumulate( // (8,4,1) (N,1,1)
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
(float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
(float *) shr1.rgbaw, // float * rgba,
(float *) shr1.rgbaw, // float * rgba,
// result
// if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed
// to calculate max_diff_shared
calc_extra, // int calc_extra, // 1 - calcualate ports_rgb, max_diff
calc_extra, // int calc_extra, // 1 - calcualate ports_rgb, max_diff
ports_rgb_shared, // float ports_rgb_shared [NUM_COLORS][NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
ports_rgb_shared, // float ports_rgb_shared [NUM_COLORS][NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
...
@@ -3512,7 +3513,7 @@ __device__ void tile_combine_rgba(
...
@@ -3512,7 +3513,7 @@ __device__ void tile_combine_rgba(
float * mclt_tile, // debayer // has gaps to align with union !
float * mclt_tile, // debayer // has gaps to align with union !
float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
float * rgba, // result
float * rgba, // result
int calc_extra, // 1 - calcu
alate ports_rgb, max_diff
int calc_extra, // 1 - calcu
late ports_rgb, max_diff (if not null - will ignore rbg_tile !)
float ports_rgb_shared [NUM_COLORS][NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
float ports_rgb_shared [NUM_COLORS][NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
float max_diff_shared [NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
float max_diff_shared [NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
float max_diff_tmp [NUM_CAMS][TEXTURE_THREADS_PER_TILE],
float max_diff_tmp [NUM_CAMS][TEXTURE_THREADS_PER_TILE],
...
@@ -3846,7 +3847,7 @@ __device__ void tile_combine_rgba(
...
@@ -3846,7 +3847,7 @@ __device__ void tile_combine_rgba(
#endif // #ifdef DEBUG9
#endif // #ifdef DEBUG9
///
///
if (rbg_tile
) {
if (rbg_tile
&& (calc_extra == 0)) { // will keep debayered if (calc_extra == 0)
float k = 0.0;
float k = 0.0;
int rbga_offset = colors * (DTT_SIZE2*DTT_SIZE21); // padded in union !
int rbga_offset = colors * (DTT_SIZE2*DTT_SIZE21); // padded in union !
#pragma unroll
#pragma unroll
...
@@ -3923,12 +3924,12 @@ __device__ void tile_combine_rgba(
...
@@ -3923,12 +3924,12 @@ __device__ void tile_combine_rgba(
int row = (pass >> 1);
int row = (pass >> 1);
int col = ((pass & 1) << 3) + threadIdx.x;
int col = ((pass & 1) << 3) + threadIdx.x;
int i = row * DTT_SIZE21 + col;
int i = row * DTT_SIZE21 + col;
int row_sym = row ^ ((row & 8)? 0xf : 0);
///
int row_sym = row ^ ((row & 8)? 0xf : 0);
int col_sym = col ^ ((col & 8)? 0xf : 0);
///
int col_sym = col ^ ((col & 8)? 0xf : 0);
// Was it a bug?
// Was it a bug?
// float wnd2 = HWINDOW_SQ[row_sym] * HWINDOW_SQi[col_sym];
// float wnd2 = HWINDOW_SQ[row_sym] * HWINDOW_SQi[col_sym];
float wnd2 = HWINDOW_SQ[row_sym] * HWINDOW_SQ[col_sym];
///
float wnd2 = HWINDOW_SQ[row_sym] * HWINDOW_SQ[col_sym];
float * mclt_cam_i = mclt_tile + colors_offset * cam + i;
float * mclt_cam_i = mclt_tile + colors_offset * cam + i;
// float * mclt_cam_i = rbg_tile + colors_offset * cam + i;
// float * mclt_cam_i = rbg_tile + colors_offset * cam + i;
//
//
...
@@ -3939,7 +3940,7 @@ __device__ void tile_combine_rgba(
...
@@ -3939,7 +3940,7 @@ __device__ void tile_combine_rgba(
float dc = *(mclt_cam_i + (DTT_SIZE2*(DTT_SIZE21 + 1)) * ncol) - *(rgba + (DTT_SIZE2*DTT_SIZE21) * ncol + i);
float dc = *(mclt_cam_i + (DTT_SIZE2*(DTT_SIZE21 + 1)) * ncol) - *(rgba + (DTT_SIZE2*DTT_SIZE21) * ncol + i);
d2 += *(chn_weights + ncol) * dc * dc;
d2 += *(chn_weights + ncol) * dc * dc;
}
}
// d2 *= wnd2;
//
/
d2 *= wnd2;
max_diff_tmp[cam][threadIdx.x] = fmaxf(max_diff_tmp[cam][threadIdx.x], d2);
max_diff_tmp[cam][threadIdx.x] = fmaxf(max_diff_tmp[cam][threadIdx.x], d2);
}
}
__syncthreads();
__syncthreads();
...
@@ -3959,8 +3960,29 @@ __device__ void tile_combine_rgba(
...
@@ -3959,8 +3960,29 @@ __device__ void tile_combine_rgba(
for (int i = 0; i < TEXTURE_THREADS_PER_TILE; i++){
for (int i = 0; i < TEXTURE_THREADS_PER_TILE; i++){
printf("tmp[%d] %f %f %f %f\n",i, max_diff_tmp[0][i],max_diff_tmp[1][i],max_diff_tmp[2][i],max_diff_tmp[3][i]);
printf("tmp[%d] %f %f %f %f\n",i, max_diff_tmp[0][i],max_diff_tmp[1][i],max_diff_tmp[2][i],max_diff_tmp[3][i]);
}
}
for (int ncol = 0; ncol < colors; ncol++){
printf("\n average for color %d\n",ncol);
debug_print_mclt(
rgba + (DTT_SIZE2*DTT_SIZE21) * ncol,
-1);
for (int ncam = 0; ncam < NUM_CAMS;ncam ++){
printf("\n mclt for color %d, camera %d\n",ncol,ncam);
debug_print_mclt(
mclt_tile + (DTT_SIZE2*(DTT_SIZE21 + 1)) * ncol + colors_offset * ncam,
-1);
#if 0
printf("\n rgb_tile for color %d, camera %d\n",ncol,ncam);
if (rgb_tile) {
debug_print_mclt(
rbg_tile + (DTT_SIZE2*(DTT_SIZE21 + 1)) * ncol + colors_offset * ncam,
-1);
}
#endif
}
}
}
}
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
#endif // #ifdef DEBUG22
#endif // #ifdef DEBUG22
}
}
if (calc_extra) {
if (calc_extra) {
...
...
src/geometry_correction.cu
View file @
26689442
...
@@ -389,6 +389,13 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -389,6 +389,13 @@ extern "C" __global__ void get_tiles_offsets(
float disparity = gpu_tasks[task_num].target_disparity;
float disparity = gpu_tasks[task_num].target_disparity;
int tileX = (cxy & 0xffff);
int tileX = (cxy & 0xffff);
int tileY = (cxy >> 16);
int tileY = (cxy >> 16);
#ifdef DEBUG23
if ((ncam == 0) && (tileX == DBG_TILE_X) && (tileY == DBG_TILE_Y)){
printf ("\n get_tiles_offsets(): Debugging tileX=%d, tileY=%d, ncam = %d\n", tileX,tileY,ncam);
printf("\n");
__syncthreads();
}
#endif //#ifdef DEBUG23
float px = tileX * DTT_SIZE + DTT_SIZE/2; // - shiftX;
float px = tileX * DTT_SIZE + DTT_SIZE/2; // - shiftX;
float py = tileY * DTT_SIZE + DTT_SIZE/2; // - shiftY;
float py = tileY * DTT_SIZE + DTT_SIZE/2; // - shiftY;
...
@@ -524,7 +531,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -524,7 +531,7 @@ extern "C" __global__ void get_tiles_offsets(
float dpXci_droll = drvi_drl[0] * norm_z - pXci * drvi_drl[2] / rvi[2];
float dpXci_droll = drvi_drl[0] * norm_z - pXci * drvi_drl[2] / rvi[2];
float dpYci_droll = drvi_drl[1] * norm_z - pYci * drvi_drl[2] / rvi[2];
float dpYci_droll = drvi_drl[1] * norm_z - pYci * drvi_drl[2] / rvi[2];
#ifdef DEBUG21
#ifdef DEBUG21
0
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("drvi_daz[0] = %f, drvi_daz[1] = %f, drvi_daz[2] = %f\n", drvi_daz[0], drvi_daz[1], drvi_daz[2]);
printf("drvi_daz[0] = %f, drvi_daz[1] = %f, drvi_daz[2] = %f\n", drvi_daz[0], drvi_daz[1], drvi_daz[2]);
printf("drvi_dtl[0] = %f, drvi_dtl[1] = %f, drvi_dtl[2] = %f\n", drvi_dtl[0], drvi_dtl[1], drvi_dtl[2]);
printf("drvi_dtl[0] = %f, drvi_dtl[1] = %f, drvi_dtl[2] = %f\n", drvi_dtl[0], drvi_dtl[1], drvi_dtl[2]);
...
@@ -554,7 +561,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -554,7 +561,7 @@ extern "C" __global__ void get_tiles_offsets(
dd1[1][0] = (-rot_deriv.rots[ncam][1][0]*rXY[0] -rot_deriv.rots[ncam][1][1]*rXY[1])*norm_z;
dd1[1][0] = (-rot_deriv.rots[ncam][1][0]*rXY[0] -rot_deriv.rots[ncam][1][1]*rXY[1])*norm_z;
dd1[1][1] = ( rot_deriv.rots[ncam][1][0]*rXY[1] -rot_deriv.rots[ncam][1][1]*rXY[0])*norm_z;
dd1[1][1] = ( rot_deriv.rots[ncam][1][0]*rXY[1] -rot_deriv.rots[ncam][1][1]*rXY[0])*norm_z;
#ifdef DEBUG21
#ifdef DEBUG21
0
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("dd1[0][0] = %f, dd1[0][1] = %f\n",dd1[0][0],dd1[0][1]);
printf("dd1[0][0] = %f, dd1[0][1] = %f\n",dd1[0][0],dd1[0][1]);
printf("dd1[1][0] = %f, dd1[1][1] = %f\n",dd1[1][0],dd1[1][1]);
printf("dd1[1][0] = %f, dd1[1][1] = %f\n",dd1[1][0],dd1[1][1]);
...
@@ -622,7 +629,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -622,7 +629,7 @@ extern "C" __global__ void get_tiles_offsets(
disp_dist[2] = s_dist * scale_distortXrot2Xdd1[0][0] + c_dist * scale_distortXrot2Xdd1[1][0];
disp_dist[2] = s_dist * scale_distortXrot2Xdd1[0][0] + c_dist * scale_distortXrot2Xdd1[1][0];
disp_dist[3] = s_dist * scale_distortXrot2Xdd1[0][1] + c_dist * scale_distortXrot2Xdd1[1][1];
disp_dist[3] = s_dist * scale_distortXrot2Xdd1[0][1] + c_dist * scale_distortXrot2Xdd1[1][1];
#ifdef DEBUG21
#ifdef DEBUG21
0
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("scale_distortXrot2Xdd1[0][0] = %f, scale_distortXrot2Xdd1[0][1] = %f\n",scale_distortXrot2Xdd1[0][0],scale_distortXrot2Xdd1[0][1]);
printf("scale_distortXrot2Xdd1[0][0] = %f, scale_distortXrot2Xdd1[0][1] = %f\n",scale_distortXrot2Xdd1[0][0],scale_distortXrot2Xdd1[0][1]);
printf("scale_distortXrot2Xdd1[1][0] = %f, scale_distortXrot2Xdd1[1][1] = %f\n",scale_distortXrot2Xdd1[1][0],scale_distortXrot2Xdd1[1][1]);
printf("scale_distortXrot2Xdd1[1][0] = %f, scale_distortXrot2Xdd1[1][1] = %f\n",scale_distortXrot2Xdd1[1][0],scale_distortXrot2Xdd1[1][1]);
...
@@ -654,7 +661,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -654,7 +661,7 @@ extern "C" __global__ void get_tiles_offsets(
dpYci_dtilt * extrinsic_corr.imu_rot[0] +
dpYci_dtilt * extrinsic_corr.imu_rot[0] +
dpYci_dazimuth * extrinsic_corr.imu_rot[1] +
dpYci_dazimuth * extrinsic_corr.imu_rot[1] +
dpYci_droll * extrinsic_corr.imu_rot[2]);
dpYci_droll * extrinsic_corr.imu_rot[2]);
#ifdef DEBUG21
#ifdef DEBUG21
0
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("delta_t = %f, ers_Xci = %f, ers_Yci = %f\n", delta_t, ers_Xci, ers_Yci);
printf("delta_t = %f, ers_Xci = %f, ers_Yci = %f\n", delta_t, ers_Xci, ers_Yci);
}
}
...
@@ -677,7 +684,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -677,7 +684,7 @@ extern "C" __global__ void get_tiles_offsets(
pXY[0] += ers_Xci * rD2rND; // added correction to pixel X
pXY[0] += ers_Xci * rD2rND; // added correction to pixel X
pXY[1] += ers_Yci * rD2rND; // added correction to pixel Y
pXY[1] += ers_Yci * rD2rND; // added correction to pixel Y
#ifdef DEBUG21
#ifdef DEBUG21
0
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("k = %f, wdisparity = %f, dwdisp_dz = %f\n", k, wdisparity, dwdisp_dz);
printf("k = %f, wdisparity = %f, dwdisp_dz = %f\n", k, wdisparity, dwdisp_dz);
printf("dpXci_pYci_imu_lin[0][0] = %f, dpXci_pYci_imu_lin[0][2] = %f\n", dpXci_pYci_imu_lin[0][0],dpXci_pYci_imu_lin[0][2]);
printf("dpXci_pYci_imu_lin[0][0] = %f, dpXci_pYci_imu_lin[0][2] = %f\n", dpXci_pYci_imu_lin[0][0],dpXci_pYci_imu_lin[0][2]);
...
...
src/tp_defines.h
View file @
26689442
...
@@ -72,7 +72,7 @@
...
@@ -72,7 +72,7 @@
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
#define DBG_DISPARITY 56.0 // disparity for which to calculate offsets (not needed in Java)
#define DBG_DISPARITY
0.0 //
56.0 // disparity for which to calculate offsets (not needed in Java)
#define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
...
@@ -83,8 +83,8 @@
...
@@ -83,8 +83,8 @@
//#define DBG_TILE_X 40
//#define DBG_TILE_X 40
//#define DBG_TILE_Y 80
//#define DBG_TILE_Y 80
#define DBG_TILE_X 151 // 161 // 49
#define DBG_TILE_X 1
62 // 1
51 // 161 // 49
#define DBG_TILE_Y 69 // 111 // 66
#define DBG_TILE_Y
121 //
69 // 111 // 66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#undef DBG_MARK_DBG_TILE
#undef DBG_MARK_DBG_TILE
...
@@ -114,10 +114,11 @@
...
@@ -114,10 +114,11 @@
// geom
// geom
//#define DEBUG20 1
//#define DEBUG20 1
// #define DEBUG21 1 // Geometry Correction
#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
//#define DEBUG21 1 // Geometry Correction
#define DEBUG22 1
//#define DEBUG22 1
#define DEBUG23 1
#endif //#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
#endif //#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment