Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
460eda96
Commit
460eda96
authored
Apr 08, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
moved LPF to 4-image generation to match correlations and textures
parent
06b62a05
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
73 additions
and
45 deletions
+73
-45
TileProcessor.cuh
src/TileProcessor.cuh
+67
-41
test_tp.cu
src/test_tp.cu
+6
-4
No files found.
src/TileProcessor.cuh
View file @
460eda96
...
@@ -105,7 +105,9 @@
...
@@ -105,7 +105,9 @@
#define DBG_TILE_X 161 // 49
#define DBG_TILE_X 161 // 49
#define DBG_TILE_Y 111 // 66
#define DBG_TILE_Y 111 // 66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#undef DBG_MARK_DBG_TILE 1
//56494
//56494
// struct tp_task
// struct tp_task
//#define TASK_SIZE 12
//#define TASK_SIZE 12
...
@@ -879,6 +881,18 @@ extern "C" __global__ void textures_accumulate(
...
@@ -879,6 +881,18 @@ extern "C" __global__ void textures_accumulate(
size_t texture_stride, // in floats (now 256*4 = 1024)
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
extern "C"
__global__ void imclt_rbg(
float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float * gpu_rbg, // WIDTH, 3 * HEIGHT
int apply_lpf,
int mono, // defines lpf filter
int color, // defines location of clt data
int v_offset,
int h_offset,
const size_t dstride); // in floats (pixels)
//===========================
extern "C"
extern "C"
__global__ void correlate2D(
__global__ void correlate2D(
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
...
@@ -2274,16 +2288,12 @@ __global__ void textures_accumulate(
...
@@ -2274,16 +2288,12 @@ __global__ void textures_accumulate(
} // textures_accumulate()
} // textures_accumulate()
extern "C"
extern "C"
__global__ void imclt_rbg(
__global__ void imclt_rbg(
float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float * gpu_rbg, // WIDTH, 3 * HEIGHT
float * gpu_rbg, // WIDTH, 3 * HEIGHT
int apply_lpf,
int mono,
int color,
int color,
int v_offset,
int v_offset,
int h_offset,
int h_offset,
...
@@ -2334,13 +2344,27 @@ __global__ void imclt_rbg(
...
@@ -2334,13 +2344,27 @@ __global__ void imclt_rbg(
clt_tile += column + thr3; // first 2 rows
clt_tile += column + thr3; // first 2 rows
gpu_tile += column; // first 2 rows
gpu_tile += column; // first 2 rows
if (apply_lpf) {
// lpf - covers 2 rows, as there there are 16 threads
float *lpf0 = lpf_data[mono? 3 :color] + threadIdx.x; // lpf_data[3] - mono
#pragma unroll
for (int q = 0; q < 4; q++){
float *lpf = lpf0;
for (int i = 0; i < DTT_SIZE/2; i++){
*clt_tile= *gpu_tile * (*lpf);
clt_tile += (2 * DTT_SIZE1);
gpu_tile += (2 * DTT_SIZE);
lpf += (2 * DTT_SIZE);
}
}
} else {
#pragma unroll
#pragma unroll
for (int i = 0; i < DTT_SIZE2; i++){
for (int i = 0; i < DTT_SIZE2; i++){
*clt_tile= *gpu_tile;
*clt_tile= *gpu_tile;
clt_tile += (2 * DTT_SIZE1);
clt_tile += (2 * DTT_SIZE1);
gpu_tile += (2 * DTT_SIZE);
gpu_tile += (2 * DTT_SIZE);
}
}
}
float * mclt_top = ((float*) mclt_tiles) + tile_in_block * (DTT_SIZE2 * DTT_SIZE21) + column;
float * mclt_top = ((float*) mclt_tiles) + tile_in_block * (DTT_SIZE2 * DTT_SIZE21) + column;
float * rbg_top = color_plane + (tileY * DTT_SIZE)* dstride + (tileX * DTT_SIZE) + column;
float * rbg_top = color_plane + (tileY * DTT_SIZE)* dstride + (tileX * DTT_SIZE) + column;
float * mclt_tile = mclt_top;
float * mclt_tile = mclt_top;
...
@@ -2377,31 +2401,33 @@ __global__ void imclt_rbg(
...
@@ -2377,31 +2401,33 @@ __global__ void imclt_rbg(
// save result (back)
// save result (back)
float * rbg_p = rbg_top;
float * rbg_p = rbg_top;
mclt_tile = mclt_top;
mclt_tile = mclt_top;
if ((tileX == 0) && (tileY == 0)){
if ((tileX == 0) && (tileY == 0)){
#pragma unroll
#pragma unroll
for (int i = 0; i < DTT_SIZE2; i++){
for (int i = 0; i < DTT_SIZE2; i++){
*rbg_p = 100.0f; // just testing
*rbg_p = 100.0f; // just testing
mclt_tile += DTT_SIZE21;
mclt_tile += DTT_SIZE21;
rbg_p += dstride; // DTT_SIZE2; // FIXME
rbg_p += dstride; // DTT_SIZE2; // FIXME
}
}
} else if ((tileX == DBG_TILE_X) && (tileY == DBG_TILE_Y)){
#ifdef DBG_MARK_DBG_TILE
} else if ((tileX == DBG_TILE_X) && (tileY == DBG_TILE_Y)){
#pragma unroll
#pragma unroll
for (int i = 0; i < DTT_SIZE2; i++){
for (int i = 0; i < DTT_SIZE2; i++){
*rbg_p = (*mclt_tile) * 2.0; // just testing
*rbg_p = (*mclt_tile) * 2.0; // just testing
mclt_tile += DTT_SIZE21;
mclt_tile += DTT_SIZE21;
rbg_p += dstride; // DTT_SIZE2; // FIXME
rbg_p += dstride; // DTT_SIZE2; // FIXME
}
}
} else {
#endif
} else {
#pragma unroll
#pragma unroll
for (int i = 0; i < DTT_SIZE2; i++){
for (int i = 0; i < DTT_SIZE2; i++){
*rbg_p = *mclt_tile;
*rbg_p = *mclt_tile;
mclt_tile += DTT_SIZE21;
mclt_tile += DTT_SIZE21;
rbg_p += dstride; // DTT_SIZE2; // FIXME
rbg_p += dstride; // DTT_SIZE2; // FIXME
}
}
}
}
}
}
...
@@ -3184,15 +3210,15 @@ __device__ void convertCorrectTile(
...
@@ -3184,15 +3210,15 @@ __device__ void convertCorrectTile(
lpf += DTT_SIZE;
lpf += DTT_SIZE;
}
}
}
}
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
#ifdef DBG_TILE
#ifdef DBG_TILE
#ifdef DEBUG3
#ifdef DEBUG3
if ((threadIdx.x) == 0){
if ((threadIdx.x) == 0){
printf("\nDTT Tiles after LPF, color = %d\n",color);
printf("\nDTT Tiles after LPF, color = %d\n",color);
debug_print_clt1(clt_tile, color, 0xf); // only 1 quadrant for R,B and 2 - for G
debug_print_clt1(clt_tile, color, 0xf); // only 1 quadrant for R,B and 2 - for G
printf("\nDTT All done\n");
printf("\nDTT All done\n");
}
}
__syncthreads();// __syncwarp();
__syncthreads();// __syncwarp();
#endif
#endif
#endif
#endif
}
}
...
...
src/test_tp.cu
View file @
460eda96
...
@@ -649,11 +649,13 @@ int main(int argc, char **argv)
...
@@ -649,11 +649,13 @@ int main(int argc, char **argv)
dim3 grid_imclt((tiles_in_pass + IMCLT_TILES_PER_BLOCK-1) / IMCLT_TILES_PER_BLOCK,1,1);
dim3 grid_imclt((tiles_in_pass + IMCLT_TILES_PER_BLOCK-1) / IMCLT_TILES_PER_BLOCK,1,1);
// printf("grid_imclt= (%d, %d, %d)\n",grid_imclt.x, grid_imclt.y, grid_imclt.z);
// printf("grid_imclt= (%d, %d, %d)\n",grid_imclt.x, grid_imclt.y, grid_imclt.z);
imclt_rbg<<<grid_imclt,threads_imclt>>>(
imclt_rbg<<<grid_imclt,threads_imclt>>>(
gpu_clt_h[ncam], // float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_clt_h[ncam],
// float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
gpu_corr_images_h[ncam], // float * gpu_rbg, // WIDTH, 3 * HEIGHT
gpu_corr_images_h[ncam], // float * gpu_rbg, // WIDTH, 3 * HEIGHT
color, // int color,
1, // int apply_lpf,
v_offs, // int v_offset,
0, // int mono, // defines lpf filter
h_offs, // int h_offset,
color, // int color, // defines location of clt data
v_offs, // int v_offset,
h_offs, // int h_offset,
dstride_rslt/sizeof(float)); //const size_t dstride); // in floats (pixels)
dstride_rslt/sizeof(float)); //const size_t dstride); // in floats (pixels)
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment