Commit 5866263d authored by Andrey Filippov's avatar Andrey Filippov

debugging geometry_correction - was corrupting last tiles

parent 705a2e85
...@@ -281,6 +281,32 @@ extern "C" __global__ void calc_rot_deriv( ...@@ -281,6 +281,32 @@ extern "C" __global__ void calc_rot_deriv(
} }
extern "C" __global__ void calculate_tiles_offsets(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN
trot_deriv * gpu_rot_deriv)
{
dim3 threads_geom(NUM_CAMS,TILES_PER_BLOCK_GEOM, 1);
dim3 grid_geom ((num_tiles+TILES_PER_BLOCK_GEOM-1)/TILES_PER_BLOCK_GEOM, 1, 1);
if (threadIdx.x == 0) { // always 1
get_tiles_offsets<<<grid_geom,threads_geom>>> (
gpu_tasks, // struct tp_task * gpu_tasks,
num_tiles, // int num_tiles, // number of tiles in task list
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_correction_vector, // struct corr_vector * gpu_correction_vector,
gpu_rByRDist, // float * gpu_rByRDist) // length should match RBYRDIST_LEN
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
}
// __syncthreads();// __syncwarp();
// cudaDeviceSynchronize();
// cudaDeviceSynchronize();
}
/* /*
* blockDim.x = NUM_CAMS * blockDim.x = NUM_CAMS
* blockDim.y = TILES_PER_BLOCK_GEOM * blockDim.y = TILES_PER_BLOCK_GEOM
...@@ -295,12 +321,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -295,12 +321,7 @@ extern "C" __global__ void get_tiles_offsets(
trot_deriv * gpu_rot_deriv) trot_deriv * gpu_rot_deriv)
{ {
int task_num = blockIdx.x * blockDim.y + threadIdx.y; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y int task_num = blockIdx.x * blockDim.y + threadIdx.y; // blockIdx.x * TILES_PER_BLOCK_GEOM + threadIdx.y
if (task_num >= num_tiles){
return;
}
int thread_xy = blockDim.x * threadIdx.y + threadIdx.x; int thread_xy = blockDim.x * threadIdx.y + threadIdx.x;
int ncam = threadIdx.x;
// threadIdx.x - numcam, used for per-camera
__shared__ struct gc geometry_correction; __shared__ struct gc geometry_correction;
__shared__ float rByRDist [RBYRDIST_LEN]; __shared__ float rByRDist [RBYRDIST_LEN];
__shared__ struct corr_vector extrinsic_corr; __shared__ struct corr_vector extrinsic_corr;
...@@ -355,6 +376,10 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -355,6 +376,10 @@ extern "C" __global__ void get_tiles_offsets(
} }
} }
__syncthreads(); __syncthreads();
int ncam = threadIdx.x;
if (task_num >= num_tiles){
return;
}
int imu_exists = // todo - calculate once with rot_deriv? int imu_exists = // todo - calculate once with rot_deriv?
(extrinsic_corr.imu_rot[0] != 0.0) || (extrinsic_corr.imu_rot[0] != 0.0) ||
(extrinsic_corr.imu_rot[1] != 0.0) || (extrinsic_corr.imu_rot[1] != 0.0) ||
...@@ -418,7 +443,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -418,7 +443,7 @@ extern "C" __global__ void get_tiles_offsets(
xyz[0] = SCENE_UNITS_SCALE * pXc * geometry_correction.disparityRadius / disparity; xyz[0] = SCENE_UNITS_SCALE * pXc * geometry_correction.disparityRadius / disparity;
xyz[1] = -SCENE_UNITS_SCALE * pYc * geometry_correction.disparityRadius / disparity; xyz[1] = -SCENE_UNITS_SCALE * pYc * geometry_correction.disparityRadius / disparity;
// next radial distortion coefficients are for this, not master camera (may be the same) // next radial distortion coefficients are for this, not master camera (may be the same)
// geometry_correction.rad_coeff[i]; // geometry_correction.rad_coeff[i];
float fl_pix = geometry_correction.focalLength/(0.001 * geometry_correction.pixelSize); // focal length in pixels - this camera float fl_pix = geometry_correction.focalLength/(0.001 * geometry_correction.pixelSize); // focal length in pixels - this camera
float ri_scale = 0.001 * geometry_correction.pixelSize / geometry_correction.distortionRadius; float ri_scale = 0.001 * geometry_correction.pixelSize / geometry_correction.distortionRadius;
...@@ -486,7 +511,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -486,7 +511,7 @@ extern "C" __global__ void get_tiles_offsets(
float pYid = pYci * rD2rND; float pYid = pYci * rD2rND;
pXY[0] = pXid + geometry_correction.pXY0[ncam][0]; pXY[0] = pXid + geometry_correction.pXY0[ncam][0];
pXY[1] = pYid + geometry_correction.pXY0[ncam][1]; pXY[1] = pYid + geometry_correction.pXY0[ncam][1];
// new for ERS // new for ERS
pY_offsets[threadIdx.y][ncam] = pXY[1] - geometry_correction.woi_tops[ncam]; pY_offsets[threadIdx.y][ncam] = pXY[1] - geometry_correction.woi_tops[ncam];
__syncthreads(); __syncthreads();
// Each thread re-calculate same sum // Each thread re-calculate same sum
...@@ -511,9 +536,6 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -511,9 +536,6 @@ extern "C" __global__ void get_tiles_offsets(
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG21 #endif // DEBUG21
// float rvi[3];
float drvi_daz [3]; // drvi_daz = deriv_rots[i][0].times(vi); float drvi_daz [3]; // drvi_daz = deriv_rots[i][0].times(vi);
float drvi_dtl [3]; // drvi_dtl = deriv_rots[i][1].times(vi); float drvi_dtl [3]; // drvi_dtl = deriv_rots[i][1].times(vi);
float drvi_drl [3]; // drvi_drl = deriv_rots[i][2].times(vi); float drvi_drl [3]; // drvi_drl = deriv_rots[i][2].times(vi);
...@@ -547,7 +569,7 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -547,7 +569,7 @@ extern "C" __global__ void get_tiles_offsets(
float disp_dist[4]; // only for this channel, to be copied to global gpu_tasks in the end float disp_dist[4]; // only for this channel, to be copied to global gpu_tasks in the end
float dpXci_pYci_imu_lin[2][3]; float dpXci_pYci_imu_lin[2][3];
/* /*
double [][] add0 = { double [][] add0 = {
{-rXY[i][0], rXY[i][1], 0.0}, {-rXY[i][0], rXY[i][1], 0.0},
{-rXY[i][1], -rXY[i][0], 0.0}, {-rXY[i][1], -rXY[i][0], 0.0},
...@@ -570,12 +592,11 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -570,12 +592,11 @@ extern "C" __global__ void get_tiles_offsets(
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG21 #endif // DEBUG21
// now first column of 2x2 dd1 - x, y components of derivatives by disparity, second column - derivatives by ortho to disparity (~Y in 2d correlation) // now first column of 2x2 dd1 - x, y components of derivatives by disparity, second column - derivatives by ortho to disparity (~Y in 2d correlation)
// unity vector in the direction of radius // unity vector in the direction of radius
float c_dist = pXci/rNDi; float c_dist = pXci/rNDi;
float s_dist = pYci/rNDi; float s_dist = pYci/rNDi;
//#undef NVRTC_BUG //#undef NVRTC_BUG
float drD2rND_dri = 0.0; float drD2rND_dri = 0.0;
{ {
float rri = 1.0; float rri = 1.0;
...@@ -618,28 +639,16 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -618,28 +639,16 @@ extern "C" __global__ void get_tiles_offsets(
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
#endif // DEBUG21 #endif // DEBUG21
gpu_tasks[task_num].disp_dist[ncam][0] = disp_dist[0]; gpu_tasks[task_num].disp_dist[ncam][0] = disp_dist[0];
gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1]; gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2]; gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3]; gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
// imu = extrinsic_corr.getIMU(i); // currently it is common for all channels // imu = extrinsic_corr.getIMU(i); // currently it is common for all channels
// float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15 // float imu_rot [3]; // d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
// float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19 geometry_correction.imu_move // float imu_move[3]; // dx/dt, dy/dt, dz/dt 16..19 geometry_correction.imu_move
// ERS linear does not yet use per-port rotations, probably not needed // ERS linear does not yet use per-port rotations, probably not needed
if (imu_exists){ if (imu_exists){
/*
float delta_t = disp_dist[2] * disparity * geometry_correction.line_time; // positive for top cameras, negative - for bottom //disp_dist[2]=dd2.get(1, 0)
float ers_Xci = delta_t * (
dpXci_dtilt * extrinsic_corr.imu_rot[0] +
dpXci_dazimuth * extrinsic_corr.imu_rot[1] +
dpXci_droll * extrinsic_corr.imu_rot[2]);
float ers_Yci = delta_t* (
dpYci_dtilt * extrinsic_corr.imu_rot[0] +
dpYci_dazimuth * extrinsic_corr.imu_rot[1] +
dpYci_droll * extrinsic_corr.imu_rot[2]);
*/
float ers_x = float ers_x =
dpXci_dtilt * extrinsic_corr.imu_rot[0] + dpXci_dtilt * extrinsic_corr.imu_rot[0] +
dpXci_dazimuth * extrinsic_corr.imu_rot[1] + dpXci_dazimuth * extrinsic_corr.imu_rot[1] +
...@@ -649,11 +658,8 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -649,11 +658,8 @@ extern "C" __global__ void get_tiles_offsets(
dpYci_dazimuth * extrinsic_corr.imu_rot[1] + dpYci_dazimuth * extrinsic_corr.imu_rot[1] +
dpYci_droll * extrinsic_corr.imu_rot[2]; dpYci_droll * extrinsic_corr.imu_rot[2];
#ifdef DEBUG21 #ifdef DEBUG21
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){ if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
// printf("delta_t = %f, ers_Xci = %f, ers_Yci = %f\n", delta_t, ers_Xci, ers_Yci);
printf("ers_x = %f, ers_y = %f\n", ers_x, ers_y); printf("ers_x = %f, ers_y = %f\n", ers_x, ers_y);
} }
__syncthreads();// __syncwarp(); __syncthreads();// __syncwarp();
...@@ -666,14 +672,6 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -666,14 +672,6 @@ extern "C" __global__ void get_tiles_offsets(
dpXci_pYci_imu_lin[1][1] = wdisparity / k; // dpy/ dworld_Y dpXci_pYci_imu_lin[1][1] = wdisparity / k; // dpy/ dworld_Y
dpXci_pYci_imu_lin[0][2] = (xyz[0] / k) * dwdisp_dz; // dpx/ dworld_Z dpXci_pYci_imu_lin[0][2] = (xyz[0] / k) * dwdisp_dz; // dpx/ dworld_Z
dpXci_pYci_imu_lin[1][2] = (xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z dpXci_pYci_imu_lin[1][2] = (xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z
/*
ers_Xci += delta_t* (
dpXci_pYci_imu_lin[0][0] * extrinsic_corr.imu_move[0] +
dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2]);
ers_Yci += delta_t* (
dpXci_pYci_imu_lin[1][1] * extrinsic_corr.imu_move[1] +
dpXci_pYci_imu_lin[1][2] * extrinsic_corr.imu_move[2]);
*/
ers_x += dpXci_pYci_imu_lin[0][0] * extrinsic_corr.imu_move[0] + ers_x += dpXci_pYci_imu_lin[0][0] * extrinsic_corr.imu_move[0] +
dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2]; dpXci_pYci_imu_lin[0][2] * extrinsic_corr.imu_move[2];
ers_y += dpXci_pYci_imu_lin[1][1] * extrinsic_corr.imu_move[1] + ers_y += dpXci_pYci_imu_lin[1][1] * extrinsic_corr.imu_move[1] +
...@@ -700,8 +698,6 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -700,8 +698,6 @@ extern "C" __global__ void get_tiles_offsets(
// copy results to global memory pXY, disp_dist // copy results to global memory pXY, disp_dist
gpu_tasks[task_num].xy[ncam][0] = pXY[0]; gpu_tasks[task_num].xy[ncam][0] = pXY[0];
gpu_tasks[task_num].xy[ncam][1] = pXY[1]; gpu_tasks[task_num].xy[ncam][1] = pXY[1];
} }
extern "C" __global__ void calcReverseDistortionTable( extern "C" __global__ void calcReverseDistortionTable(
......
...@@ -149,6 +149,15 @@ extern "C" __global__ void get_tiles_offsets( ...@@ -149,6 +149,15 @@ extern "C" __global__ void get_tiles_offsets(
float * gpu_rByRDist, // length should match RBYRDIST_LEN float * gpu_rByRDist, // length should match RBYRDIST_LEN
trot_deriv * gpu_rot_deriv); trot_deriv * gpu_rot_deriv);
extern "C" __global__ void calculate_tiles_offsets(
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task
struct gc * gpu_geometry_correction,
struct corr_vector * gpu_correction_vector,
float * gpu_rByRDist, // length should match RBYRDIST_LEN
trot_deriv * gpu_rot_deriv);
// uses NUM_CAMS blocks, (3,3,3) threads // uses NUM_CAMS blocks, (3,3,3) threads
extern "C" __global__ void calc_rot_deriv( extern "C" __global__ void calc_rot_deriv(
struct corr_vector * gpu_correction_vector, struct corr_vector * gpu_correction_vector,
......
...@@ -739,6 +739,7 @@ int main(int argc, char **argv) ...@@ -739,6 +739,7 @@ int main(int argc, char **argv)
sdkResetTimer(&timerGEOM); sdkResetTimer(&timerGEOM);
sdkStartTimer(&timerGEOM); sdkStartTimer(&timerGEOM);
} }
/*
get_tiles_offsets<<<grid_geom,threads_geom>>> ( get_tiles_offsets<<<grid_geom,threads_geom>>> (
gpu_tasks, // struct tp_task * gpu_tasks, gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list tp_task_size, // int num_tiles, // number of tiles in task list
...@@ -746,6 +747,14 @@ int main(int argc, char **argv) ...@@ -746,6 +747,14 @@ int main(int argc, char **argv)
gpu_correction_vector, // struct corr_vector * gpu_correction_vector, gpu_correction_vector, // struct corr_vector * gpu_correction_vector,
gpu_rByRDist, // float * gpu_rByRDist) // length should match RBYRDIST_LEN gpu_rByRDist, // float * gpu_rByRDist) // length should match RBYRDIST_LEN
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv); gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
*/
calculate_tiles_offsets<<<1,1>>> (
gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_correction_vector, // struct corr_vector * gpu_correction_vector,
gpu_rByRDist, // float * gpu_rByRDist) // length should match RBYRDIST_LEN
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
getLastCudaError("Kernel failure"); getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize()); checkCudaErrors(cudaDeviceSynchronize());
......
...@@ -93,8 +93,8 @@ ...@@ -93,8 +93,8 @@
//#define DBG_TILE_X 40 //#define DBG_TILE_X 40
//#define DBG_TILE_Y 80 //#define DBG_TILE_Y 80
#define DBG_TILE_X 162 // 151 // 161 // 49 #define DBG_TILE_X 32 // 162 // 151 // 161 // 49
#define DBG_TILE_Y 121 // 69 // 111 // 66 #define DBG_TILE_Y 88 // 121 // 69 // 111 // 66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X) #define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#undef DBG_MARK_DBG_TILE #undef DBG_MARK_DBG_TILE
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment