Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
de3c497a
Commit
de3c497a
authored
Feb 18, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactoring for CDP2
parent
f8839287
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
304 additions
and
151 deletions
+304
-151
TileProcessor.cuh
src/TileProcessor.cuh
+282
-142
geometry_correction.cu
src/geometry_correction.cu
+3
-3
test_tp.cu
src/test_tp.cu
+19
-6
No files found.
src/TileProcessor.cuh
View file @
de3c497a
This diff is collapsed.
Click to expand it.
src/geometry_correction.cu
View file @
de3c497a
...
...
@@ -40,6 +40,9 @@
#include "tp_defines.h"
#include "dtt8x8.h"
#include "geometry_correction.h"
// #include "TileProcessor.h"
#endif // #ifndef JCUDA
#ifndef get_task_size
...
...
@@ -337,9 +340,6 @@ extern "C" __global__ void calculate_tiles_offsets(
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
}
// __syncthreads();// __syncwarp();
// cudaDeviceSynchronize();
// cudaDeviceSynchronize();
}
...
...
src/test_tp.cu
View file @
de3c497a
...
...
@@ -33,9 +33,9 @@
// all of the next 5 were disabled
#define NOCORR
#define NOCORR_TD
#define NOTEXTURES
#define NOTEXTURE_RGBA
#define NOTEXTURE_RGBAXXX
//
#define NOTEXTURES
//
#define NOTEXTURE_RGBA
//
#define NOTEXTURE_RGBAXXX
#define SAVE_CLT
...
...
@@ -574,11 +574,23 @@ void generate_RBGA_host(
int border_tile = pass >> 2;
int ntt = *(cpu_num_texture_tiles + ((pass & 3) << 1) + border_tile);
int *pntt = gpu_num_texture_tiles + ((pass & 3) << 1) + border_tile;
dim3 grid_texture((ntt + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1); // TEXTURE_TILES_PER_BLOCK = 1
/* before CDP2
int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILES-X * (TILES-YA >> 2)); // 1/4
if (border_tile){
ti_offset += width * (tilesya >> 2) - ntt; // TILES-X * (TILES-YA >> 2) - ntt;
}
*/
// for CDP2
int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILES-X * (TILES-YA >> 2)); // 1/4
if (border_tile){
// ti_offset += width * (tilesya >> 2) - ntt; // TILES-X * (TILES-YA >> 2) - ntt;
ti_offset += width * (tilesya >> 2); // TILES-X * (TILES-YA >> 2) - ntt;
ti_offset = - ti_offset; // does not depend on results of the previous kernel, but is negative
}
#ifdef DEBUG8A
printf("\ngenerate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d\n",
pass, border_tile,ti_offset, ntt);
...
...
@@ -604,7 +616,7 @@ void generate_RBGA_host(
num_cams, // int num_cams, // number of cameras used
gpu_woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
ntt, // size_t
num_texture_tiles, // number of texture tiles to process
pntt, // ntt, // int *
num_texture_tiles, // number of texture tiles to process
ti_offset, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
...
...
@@ -1451,10 +1463,11 @@ int main(int argc, char **argv)
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx)
printf("HOST: convert_direct() done\n");
getLastCudaError("Kernel execution failed");
printf("HOST: convert_direct() done - 1\n");
checkCudaErrors(cudaDeviceSynchronize());
printf("HOST: convert_direct() done - 2\n");
// printf("%d\n",i);
}
sdkStopTimer(&timerTP);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment