Commit 26689442 authored by Andrey Filippov's avatar Andrey Filippov

more debugging

parent 8caaa2db
...@@ -2149,7 +2149,8 @@ __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -2149,7 +2149,8 @@ __global__ void textures_accumulate( // (8,4,1) (N,1,1)
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union ! (float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output (float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
(float *) shr1.rgbaw, // float * rgba, (float *) shr1.rgbaw, // float * rgba,
// result // if calc_extra, rbg_tile will be ignored and output generated with blurred (debayered) data. Done so as debayered data is needed
// to calculate max_diff_shared
calc_extra, // int calc_extra, // 1 - calcualate ports_rgb, max_diff calc_extra, // int calc_extra, // 1 - calcualate ports_rgb, max_diff
ports_rgb_shared, // float ports_rgb_shared [NUM_COLORS][NUM_CAMS], // return to system memory (optionally pass null to skip calculation) ports_rgb_shared, // float ports_rgb_shared [NUM_COLORS][NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
...@@ -3512,7 +3513,7 @@ __device__ void tile_combine_rgba( ...@@ -3512,7 +3513,7 @@ __device__ void tile_combine_rgba(
float * mclt_tile, // debayer // has gaps to align with union ! float * mclt_tile, // debayer // has gaps to align with union !
float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
float * rgba, // result float * rgba, // result
int calc_extra, // 1 - calcualate ports_rgb, max_diff int calc_extra, // 1 - calculate ports_rgb, max_diff (if not null - will ignore rbg_tile !)
float ports_rgb_shared [NUM_COLORS][NUM_CAMS], // return to system memory (optionally pass null to skip calculation) float ports_rgb_shared [NUM_COLORS][NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
float max_diff_shared [NUM_CAMS], // return to system memory (optionally pass null to skip calculation) float max_diff_shared [NUM_CAMS], // return to system memory (optionally pass null to skip calculation)
float max_diff_tmp [NUM_CAMS][TEXTURE_THREADS_PER_TILE], float max_diff_tmp [NUM_CAMS][TEXTURE_THREADS_PER_TILE],
...@@ -3846,7 +3847,7 @@ __device__ void tile_combine_rgba( ...@@ -3846,7 +3847,7 @@ __device__ void tile_combine_rgba(
#endif // #ifdef DEBUG9 #endif // #ifdef DEBUG9
/// ///
if (rbg_tile) { if (rbg_tile && (calc_extra == 0)) { // will keep debayered if (calc_extra == 0)
float k = 0.0; float k = 0.0;
int rbga_offset = colors * (DTT_SIZE2*DTT_SIZE21); // padded in union ! int rbga_offset = colors * (DTT_SIZE2*DTT_SIZE21); // padded in union !
#pragma unroll #pragma unroll
...@@ -3923,12 +3924,12 @@ __device__ void tile_combine_rgba( ...@@ -3923,12 +3924,12 @@ __device__ void tile_combine_rgba(
int row = (pass >> 1); int row = (pass >> 1);
int col = ((pass & 1) << 3) + threadIdx.x; int col = ((pass & 1) << 3) + threadIdx.x;
int i = row * DTT_SIZE21 + col; int i = row * DTT_SIZE21 + col;
int row_sym = row ^ ((row & 8)? 0xf : 0); /// int row_sym = row ^ ((row & 8)? 0xf : 0);
int col_sym = col ^ ((col & 8)? 0xf : 0); /// int col_sym = col ^ ((col & 8)? 0xf : 0);
// Was it a bug? // Was it a bug?
// float wnd2 = HWINDOW_SQ[row_sym] * HWINDOW_SQi[col_sym]; // float wnd2 = HWINDOW_SQ[row_sym] * HWINDOW_SQi[col_sym];
float wnd2 = HWINDOW_SQ[row_sym] * HWINDOW_SQ[col_sym]; /// float wnd2 = HWINDOW_SQ[row_sym] * HWINDOW_SQ[col_sym];
float * mclt_cam_i = mclt_tile + colors_offset * cam + i; float * mclt_cam_i = mclt_tile + colors_offset * cam + i;
// float * mclt_cam_i = rbg_tile + colors_offset * cam + i; // float * mclt_cam_i = rbg_tile + colors_offset * cam + i;
// //
...@@ -3939,7 +3940,7 @@ __device__ void tile_combine_rgba( ...@@ -3939,7 +3940,7 @@ __device__ void tile_combine_rgba(
float dc = *(mclt_cam_i + (DTT_SIZE2*(DTT_SIZE21 + 1)) * ncol) - *(rgba + (DTT_SIZE2*DTT_SIZE21) * ncol + i); float dc = *(mclt_cam_i + (DTT_SIZE2*(DTT_SIZE21 + 1)) * ncol) - *(rgba + (DTT_SIZE2*DTT_SIZE21) * ncol + i);
d2 += *(chn_weights + ncol) * dc * dc; d2 += *(chn_weights + ncol) * dc * dc;
} }
// d2 *= wnd2; /// d2 *= wnd2;
max_diff_tmp[cam][threadIdx.x] = fmaxf(max_diff_tmp[cam][threadIdx.x], d2); max_diff_tmp[cam][threadIdx.x] = fmaxf(max_diff_tmp[cam][threadIdx.x], d2);
} }
__syncthreads(); __syncthreads();
...@@ -3959,8 +3960,29 @@ __device__ void tile_combine_rgba( ...@@ -3959,8 +3960,29 @@ __device__ void tile_combine_rgba(
for (int i = 0; i < TEXTURE_THREADS_PER_TILE; i++){ for (int i = 0; i < TEXTURE_THREADS_PER_TILE; i++){
printf("tmp[%d] %f %f %f %f\n",i, max_diff_tmp[0][i],max_diff_tmp[1][i],max_diff_tmp[2][i],max_diff_tmp[3][i]); printf("tmp[%d] %f %f %f %f\n",i, max_diff_tmp[0][i],max_diff_tmp[1][i],max_diff_tmp[2][i],max_diff_tmp[3][i]);
} }
for (int ncol = 0; ncol < colors; ncol++){
printf("\n average for color %d\n",ncol);
debug_print_mclt(
rgba + (DTT_SIZE2*DTT_SIZE21) * ncol,
-1);
for (int ncam = 0; ncam < NUM_CAMS;ncam ++){
printf("\n mclt for color %d, camera %d\n",ncol,ncam);
debug_print_mclt(
mclt_tile + (DTT_SIZE2*(DTT_SIZE21 + 1)) * ncol + colors_offset * ncam,
-1);
#if 0
printf("\n rgb_tile for color %d, camera %d\n",ncol,ncam);
if (rgb_tile) {
debug_print_mclt(
rbg_tile + (DTT_SIZE2*(DTT_SIZE21 + 1)) * ncol + colors_offset * ncam,
-1);
}
#endif
}
}
} }
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // #ifdef DEBUG22 #endif // #ifdef DEBUG22
} }
if (calc_extra) { if (calc_extra) {
......
...@@ -389,6 +389,13 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -389,6 +389,13 @@ extern "C" __global__ void get_tiles_offsets(
float disparity = gpu_tasks[task_num].target_disparity; float disparity = gpu_tasks[task_num].target_disparity;
int tileX = (cxy & 0xffff); int tileX = (cxy & 0xffff);
int tileY = (cxy >> 16); int tileY = (cxy >> 16);
#ifdef DEBUG23
if ((ncam == 0) && (tileX == DBG_TILE_X) && (tileY == DBG_TILE_Y)){
printf ("\n get_tiles_offsets(): Debugging tileX=%d, tileY=%d, ncam = %d\n", tileX,tileY,ncam);
printf("\n");
__syncthreads();
}
#endif //#ifdef DEBUG23
float px = tileX * DTT_SIZE + DTT_SIZE/2; // - shiftX; float px = tileX * DTT_SIZE + DTT_SIZE/2; // - shiftX;
float py = tileY * DTT_SIZE + DTT_SIZE/2; // - shiftY; float py = tileY * DTT_SIZE + DTT_SIZE/2; // - shiftY;
...@@ -524,7 +531,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -524,7 +531,7 @@ extern "C" __global__ void get_tiles_offsets(
float dpXci_droll = drvi_drl[0] * norm_z - pXci * drvi_drl[2] / rvi[2]; float dpXci_droll = drvi_drl[0] * norm_z - pXci * drvi_drl[2] / rvi[2];
float dpYci_droll = drvi_drl[1] * norm_z - pYci * drvi_drl[2] / rvi[2]; float dpYci_droll = drvi_drl[1] * norm_z - pYci * drvi_drl[2] / rvi[2];
#ifdef DEBUG21 #ifdef DEBUG210
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){ if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("drvi_daz[0] = %f, drvi_daz[1] = %f, drvi_daz[2] = %f\n", drvi_daz[0], drvi_daz[1], drvi_daz[2]); printf("drvi_daz[0] = %f, drvi_daz[1] = %f, drvi_daz[2] = %f\n", drvi_daz[0], drvi_daz[1], drvi_daz[2]);
printf("drvi_dtl[0] = %f, drvi_dtl[1] = %f, drvi_dtl[2] = %f\n", drvi_dtl[0], drvi_dtl[1], drvi_dtl[2]); printf("drvi_dtl[0] = %f, drvi_dtl[1] = %f, drvi_dtl[2] = %f\n", drvi_dtl[0], drvi_dtl[1], drvi_dtl[2]);
...@@ -554,7 +561,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -554,7 +561,7 @@ extern "C" __global__ void get_tiles_offsets(
dd1[1][0] = (-rot_deriv.rots[ncam][1][0]*rXY[0] -rot_deriv.rots[ncam][1][1]*rXY[1])*norm_z; dd1[1][0] = (-rot_deriv.rots[ncam][1][0]*rXY[0] -rot_deriv.rots[ncam][1][1]*rXY[1])*norm_z;
dd1[1][1] = ( rot_deriv.rots[ncam][1][0]*rXY[1] -rot_deriv.rots[ncam][1][1]*rXY[0])*norm_z; dd1[1][1] = ( rot_deriv.rots[ncam][1][0]*rXY[1] -rot_deriv.rots[ncam][1][1]*rXY[0])*norm_z;
#ifdef DEBUG21 #ifdef DEBUG210
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){ if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("dd1[0][0] = %f, dd1[0][1] = %f\n",dd1[0][0],dd1[0][1]); printf("dd1[0][0] = %f, dd1[0][1] = %f\n",dd1[0][0],dd1[0][1]);
printf("dd1[1][0] = %f, dd1[1][1] = %f\n",dd1[1][0],dd1[1][1]); printf("dd1[1][0] = %f, dd1[1][1] = %f\n",dd1[1][0],dd1[1][1]);
...@@ -622,7 +629,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -622,7 +629,7 @@ extern "C" __global__ void get_tiles_offsets(
disp_dist[2] = s_dist * scale_distortXrot2Xdd1[0][0] + c_dist * scale_distortXrot2Xdd1[1][0]; disp_dist[2] = s_dist * scale_distortXrot2Xdd1[0][0] + c_dist * scale_distortXrot2Xdd1[1][0];
disp_dist[3] = s_dist * scale_distortXrot2Xdd1[0][1] + c_dist * scale_distortXrot2Xdd1[1][1]; disp_dist[3] = s_dist * scale_distortXrot2Xdd1[0][1] + c_dist * scale_distortXrot2Xdd1[1][1];
#ifdef DEBUG21 #ifdef DEBUG210
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){ if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("scale_distortXrot2Xdd1[0][0] = %f, scale_distortXrot2Xdd1[0][1] = %f\n",scale_distortXrot2Xdd1[0][0],scale_distortXrot2Xdd1[0][1]); printf("scale_distortXrot2Xdd1[0][0] = %f, scale_distortXrot2Xdd1[0][1] = %f\n",scale_distortXrot2Xdd1[0][0],scale_distortXrot2Xdd1[0][1]);
printf("scale_distortXrot2Xdd1[1][0] = %f, scale_distortXrot2Xdd1[1][1] = %f\n",scale_distortXrot2Xdd1[1][0],scale_distortXrot2Xdd1[1][1]); printf("scale_distortXrot2Xdd1[1][0] = %f, scale_distortXrot2Xdd1[1][1] = %f\n",scale_distortXrot2Xdd1[1][0],scale_distortXrot2Xdd1[1][1]);
...@@ -654,7 +661,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -654,7 +661,7 @@ extern "C" __global__ void get_tiles_offsets(
dpYci_dtilt * extrinsic_corr.imu_rot[0] + dpYci_dtilt * extrinsic_corr.imu_rot[0] +
dpYci_dazimuth * extrinsic_corr.imu_rot[1] + dpYci_dazimuth * extrinsic_corr.imu_rot[1] +
dpYci_droll * extrinsic_corr.imu_rot[2]); dpYci_droll * extrinsic_corr.imu_rot[2]);
#ifdef DEBUG21 #ifdef DEBUG210
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){ if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("delta_t = %f, ers_Xci = %f, ers_Yci = %f\n", delta_t, ers_Xci, ers_Yci); printf("delta_t = %f, ers_Xci = %f, ers_Yci = %f\n", delta_t, ers_Xci, ers_Yci);
} }
...@@ -677,7 +684,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -677,7 +684,7 @@ extern "C" __global__ void get_tiles_offsets(
pXY[0] += ers_Xci * rD2rND; // added correction to pixel X pXY[0] += ers_Xci * rD2rND; // added correction to pixel X
pXY[1] += ers_Yci * rD2rND; // added correction to pixel Y pXY[1] += ers_Yci * rD2rND; // added correction to pixel Y
#ifdef DEBUG21 #ifdef DEBUG210
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){ if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("k = %f, wdisparity = %f, dwdisp_dz = %f\n", k, wdisparity, dwdisp_dz); printf("k = %f, wdisparity = %f, dwdisp_dz = %f\n", k, wdisparity, dwdisp_dz);
printf("dpXci_pYci_imu_lin[0][0] = %f, dpXci_pYci_imu_lin[0][2] = %f\n", dpXci_pYci_imu_lin[0][0],dpXci_pYci_imu_lin[0][2]); printf("dpXci_pYci_imu_lin[0][0] = %f, dpXci_pYci_imu_lin[0][2] = %f\n", dpXci_pYci_imu_lin[0][0],dpXci_pYci_imu_lin[0][2]);
......
...@@ -72,7 +72,7 @@ ...@@ -72,7 +72,7 @@
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list #define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
#define DBG_DISPARITY 56.0 // disparity for which to calculate offsets (not needed in Java) #define DBG_DISPARITY 0.0 // 56.0 // disparity for which to calculate offsets (not needed in Java)
#define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory #define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001); #define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads #define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
...@@ -83,8 +83,8 @@ ...@@ -83,8 +83,8 @@
//#define DBG_TILE_X 40 //#define DBG_TILE_X 40
//#define DBG_TILE_Y 80 //#define DBG_TILE_Y 80
#define DBG_TILE_X 151 // 161 // 49 #define DBG_TILE_X 162 // 151 // 161 // 49
#define DBG_TILE_Y 69 // 111 // 66 #define DBG_TILE_Y 121 // 69 // 111 // 66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X) #define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#undef DBG_MARK_DBG_TILE #undef DBG_MARK_DBG_TILE
...@@ -114,10 +114,11 @@ ...@@ -114,10 +114,11 @@
// geom // geom
//#define DEBUG20 1 //#define DEBUG20 1
// #define DEBUG21 1 // Geometry Correction
#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0) #if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
//#define DEBUG21 1 // Geometry Correction
#define DEBUG22 1 //#define DEBUG22 1
#define DEBUG23 1
#endif //#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0) #endif //#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment