Commit a01cbab2 authored by Andrey Filippov's avatar Andrey Filippov

matched calculate_tiles_offsets

parent cde525c8
......@@ -497,6 +497,7 @@ extern "C" __global__ void get_tiles_offsets(
#ifdef DEBUG21
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("\nTile = %d, camera= %d\n", task_num, ncam);
printf("TargetDisparity = %f\n", disparity);
printf("tileX = %d, tileY = %d\n", tileX, tileY);
printf("px = %f, py = %f\n", px, py);
printf("pXcd = %f, pYcd = %f\n", pXcd, pYcd);
......@@ -562,10 +563,10 @@ extern "C" __global__ void get_tiles_offsets(
__syncthreads();
// Each thread re-calculate same sum
float lines_avg = 0;
for (int i = 0; i < NUM_CAMS; i ++){
for (int i = 0; i < num_cams; i ++){
lines_avg += pY_offsets[threadIdx.y][i];
}
lines_avg *= (1.0/NUM_CAMS);
lines_avg *= (1.0/num_cams);
// used when calculating derivatives, TODO: combine calculations !
float pY_offset = pY_offsets[threadIdx.y][ncam] - lines_avg;
#ifdef DEBUG21
......@@ -577,7 +578,7 @@ extern "C" __global__ void get_tiles_offsets(
printf("rD2rND = %f\n", rD2rND);
printf("pXid = %f, pYid = %f\n", pXid, pYid);
printf("pXY[0] = %f, pXY[1] = %f\n", pXY[0], pXY[1]); // OK
printf("lines_avg = %f, pY_offset = %f\n", lines_avg, pY_offset);
printf("lines_avg = %f, pY_offset = %f\n", lines_avg, pY_offset); // *
}
__syncthreads();// __syncwarp();
#endif // DEBUG21
......@@ -688,7 +689,7 @@ extern "C" __global__ void get_tiles_offsets(
/// gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
/// gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
/// gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
float * disp_dist_p = gpu_ftasks + task_size * task_num + 3 + ncam * 4; // ncam = threadIdx.x, so each thread will have different offset
float * disp_dist_p = gpu_ftasks + task_size * task_num + 3 + num_cams* 2 + ncam * 4; // ncam = threadIdx.x, so each thread will have different offset
*(disp_dist_p++) = disp_dist[0]; // global memory
*(disp_dist_p++) = disp_dist[1];
*(disp_dist_p++) = disp_dist[2];
......@@ -746,10 +747,12 @@ extern "C" __global__ void get_tiles_offsets(
}
}
// copy results to global memory pXY, disp_dist
// copy results to global memory pXY, disp_dist (already copied)
// gpu_tasks[task_num].xy[ncam][0] = pXY[0];
// gpu_tasks[task_num].xy[ncam][1] = pXY[1];
float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// .xy goes right after 3 commonn (tak, txy and target_disparity
float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
*(tile_xy_p++) = pXY[0]; // global memory
*(tile_xy_p++) = pXY[1]; // global memory
}
......
......@@ -86,7 +86,7 @@
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
#define DBG_DISPARITY 56.0// 0.0 // 56.0 // disparity for which to calculate offsets (not needed in Java)
#define DBG_DISPARITY 0.0 // 56.0// 0.0 // 56.0 // disparity for which to calculate offsets (not needed in Java)
#define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment