Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
a01cbab2
Commit
a01cbab2
authored
Nov 27, 2021
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
matched calculate_tiles_offsets
parent
cde525c8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
7 deletions
+10
-7
geometry_correction.cu
src/geometry_correction.cu
+9
-6
tp_defines.h
src/tp_defines.h
+1
-1
No files found.
src/geometry_correction.cu
View file @
a01cbab2
...
@@ -497,6 +497,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -497,6 +497,7 @@ extern "C" __global__ void get_tiles_offsets(
#ifdef DEBUG21
#ifdef DEBUG21
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
if ((ncam == DBG_CAM) && (task_num == DBG_TILE)){
printf("\nTile = %d, camera= %d\n", task_num, ncam);
printf("\nTile = %d, camera= %d\n", task_num, ncam);
printf("TargetDisparity = %f\n", disparity);
printf("tileX = %d, tileY = %d\n", tileX, tileY);
printf("tileX = %d, tileY = %d\n", tileX, tileY);
printf("px = %f, py = %f\n", px, py);
printf("px = %f, py = %f\n", px, py);
printf("pXcd = %f, pYcd = %f\n", pXcd, pYcd);
printf("pXcd = %f, pYcd = %f\n", pXcd, pYcd);
...
@@ -562,10 +563,10 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -562,10 +563,10 @@ extern "C" __global__ void get_tiles_offsets(
__syncthreads();
__syncthreads();
// Each thread re-calculate same sum
// Each thread re-calculate same sum
float lines_avg = 0;
float lines_avg = 0;
for (int i = 0; i <
NUM_CAMS
; i ++){
for (int i = 0; i <
num_cams
; i ++){
lines_avg += pY_offsets[threadIdx.y][i];
lines_avg += pY_offsets[threadIdx.y][i];
}
}
lines_avg *= (1.0/
NUM_CAMS
);
lines_avg *= (1.0/
num_cams
);
// used when calculating derivatives, TODO: combine calculations !
// used when calculating derivatives, TODO: combine calculations !
float pY_offset = pY_offsets[threadIdx.y][ncam] - lines_avg;
float pY_offset = pY_offsets[threadIdx.y][ncam] - lines_avg;
#ifdef DEBUG21
#ifdef DEBUG21
...
@@ -577,7 +578,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -577,7 +578,7 @@ extern "C" __global__ void get_tiles_offsets(
printf("rD2rND = %f\n", rD2rND);
printf("rD2rND = %f\n", rD2rND);
printf("pXid = %f, pYid = %f\n", pXid, pYid);
printf("pXid = %f, pYid = %f\n", pXid, pYid);
printf("pXY[0] = %f, pXY[1] = %f\n", pXY[0], pXY[1]); // OK
printf("pXY[0] = %f, pXY[1] = %f\n", pXY[0], pXY[1]); // OK
printf("lines_avg = %f, pY_offset = %f\n", lines_avg, pY_offset);
printf("lines_avg = %f, pY_offset = %f\n", lines_avg, pY_offset);
// *
}
}
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
#endif // DEBUG21
#endif // DEBUG21
...
@@ -688,7 +689,7 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -688,7 +689,7 @@ extern "C" __global__ void get_tiles_offsets(
/// gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
/// gpu_tasks[task_num].disp_dist[ncam][1] = disp_dist[1];
/// gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
/// gpu_tasks[task_num].disp_dist[ncam][2] = disp_dist[2];
/// gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
/// gpu_tasks[task_num].disp_dist[ncam][3] = disp_dist[3];
float * disp_dist_p = gpu_ftasks + task_size * task_num + 3 + ncam * 4; // ncam = threadIdx.x, so each thread will have different offset
float * disp_dist_p = gpu_ftasks + task_size * task_num + 3 + n
um_cams* 2 + n
cam * 4; // ncam = threadIdx.x, so each thread will have different offset
*(disp_dist_p++) = disp_dist[0]; // global memory
*(disp_dist_p++) = disp_dist[0]; // global memory
*(disp_dist_p++) = disp_dist[1];
*(disp_dist_p++) = disp_dist[1];
*(disp_dist_p++) = disp_dist[2];
*(disp_dist_p++) = disp_dist[2];
...
@@ -746,10 +747,12 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -746,10 +747,12 @@ extern "C" __global__ void get_tiles_offsets(
}
}
}
}
// copy results to global memory pXY, disp_dist
// copy results to global memory pXY, disp_dist
(already copied)
// gpu_tasks[task_num].xy[ncam][0] = pXY[0];
// gpu_tasks[task_num].xy[ncam][0] = pXY[0];
// gpu_tasks[task_num].xy[ncam][1] = pXY[1];
// gpu_tasks[task_num].xy[ncam][1] = pXY[1];
float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + num_cams * 4 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
// .xy goes right after 3 commonn (tak, txy and target_disparity
float * tile_xy_p = gpu_ftasks + task_size * task_num + 3 + ncam * 2; // ncam = threadIdx.x, so each thread will have different offset
*(tile_xy_p++) = pXY[0]; // global memory
*(tile_xy_p++) = pXY[0]; // global memory
*(tile_xy_p++) = pXY[1]; // global memory
*(tile_xy_p++) = pXY[1]; // global memory
}
}
...
...
src/tp_defines.h
View file @
a01cbab2
...
@@ -86,7 +86,7 @@
...
@@ -86,7 +86,7 @@
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
#define THREADS_DYNAMIC_BITS 5 // treads in block for CDP creation of the texture list
#define DBG_DISPARITY 56.0// 0.0 // 56.0 // disparity for which to calculate offsets (not needed in Java)
#define DBG_DISPARITY
0.0 //
56.0// 0.0 // 56.0 // disparity for which to calculate offsets (not needed in Java)
#define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment