Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
53f7f2ae
Commit
53f7f2ae
authored
Dec 14, 2021
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Before adding per-tile weghts for fat zeros
parent
12758c3e
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
67 additions
and
22 deletions
+67
-22
TileProcessor.cuh
src/TileProcessor.cuh
+48
-10
TileProcessor.h
src/TileProcessor.h
+9
-4
test_tp.cu
src/test_tp.cu
+9
-7
tp_defines.h
src/tp_defines.h
+1
-1
No files found.
src/TileProcessor.cuh
View file @
53f7f2ae
...
...
@@ -1109,6 +1109,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
int * woi, // x, y, width,height
float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process
int gpu_texture_indices_offset,// add to gpu_texture_indices
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction,
...
...
@@ -1151,7 +1152,7 @@ __device__ int get_textures_shared_size( // in bytes
* @param scale1 scale blue (if colors = 3) component before mixing
* @param scale2 scale green (if colors = 3) component before mixing
* @param fat_zero add this value squared to the sum of squared components before normalization\
* @param gpu_ftasks flattened tasks, 2
7 floats for quad EO, 99
floats for LWIR16
* @param gpu_ftasks flattened tasks, 2
9 floats for quad EO, 101
floats for LWIR16
// * @param gpu_tasks array of per-tile tasks (now bits 4..9 - correlation pairs)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param tilesx number of tile rows
...
...
@@ -1190,14 +1191,12 @@ extern "C" __global__ void correlate2D(
*pnum_corr_tiles = 0;
index_correlate<<<blocks0,threads0>>>(
num_cams, // int num_cams,
// sel_pairs, // int * sel_pairs,
sel_pairs0, // int sel_pairs0,
sel_pairs1, // int sel_pairs1,
sel_pairs2, // int sel_pairs2,
sel_pairs3, // int sel_pairs3,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks, // struct tp_task * gpu_tasks,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 29 floats for quad EO, 101 floats for LWIR16
num_tiles, // int num_tiles, // number of tiles in task
tilesx, // int width, // number of tiles in a row
gpu_corr_indices, // int * gpu_corr_indices, // array of correlation tasks
...
...
@@ -2062,7 +2061,8 @@ extern "C" __global__ void generate_RBGA(
woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
ntt, // size_t num_texture_tiles, // number of texture tiles to process
gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
ti_offset, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
colors, // int colors, // number of colors (3/1)
is_lwir, // int is_lwir, // do not perform shot correction
...
...
@@ -2789,7 +2789,9 @@ __global__ void convert_correct_tiles(
* @param dust_remove do not reduce average weight when only one image differs much from the average (true)
* @param texture_stride output stride in floats (now 256*4 = 1024)
* @param gpu_texture_tiles output array (number of colors +1 + ?)*16*16 rgba texture tiles) float values. Will not be calculated if null
* @param inescan_order 0 low-res tiles have tghe same order, as gpu_texture_indices, 1 - in linescan order
* @param gpu_diff_rgb_combo low-resolution output, with per-camera mismatch an each color average. Will not be calculated if null
* @param num_tilesx number of tiles in a row
*/
extern "C" __global__ void textures_nonoverlap(
int num_cams, // number of cameras
...
...
@@ -2810,6 +2812,7 @@ extern "C" __global__ void textures_nonoverlap(
// combining both non-overlap and overlap (each calculated if pointer is not null )
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
int linescan_order, // 0 low-res tiles have tghe same order, as gpu_texture_indices, 1 - in linescan order
float * gpu_diff_rgb_combo, // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
int num_tilesx)
// num_tilesx in the end - worked, after num_tiles - did not compile with JIT in Eclipse
...
...
@@ -2856,6 +2859,7 @@ extern "C" __global__ void textures_nonoverlap(
(int *) 0, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
*pnum_texture_tiles, // size_t num_texture_tiles, // number of texture tiles to process
0, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
colors, // int colors, // number of colors (3/1)
...
...
@@ -2872,8 +2876,8 @@ extern "C" __global__ void textures_nonoverlap(
0, // size_t texture_rbg_stride, // in floats
(float *) 0, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
texture_stride, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
1, //
int linescan_order, // if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
gpu_texture_tiles, //
(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
linescan_order, //
int linescan_order, // if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
num_tilesx);
}
...
...
@@ -2892,6 +2896,7 @@ extern "C" __global__ void textures_nonoverlap(
* @param woi WoI for the output texture (x,y,width,height of the woi), may be null if overlapped output is not used
* @param gpu_clt array of num_cams pointers to the CLT (frequency domain) data [TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
* @param num_texture_tiles number of texture tiles to process
* @param gpu_texture_indices_offset add to gpu_texture_indices
* @param gpu_texture_indices array - 1 integer per tile to process
* @param gpu_geometry_correction geometry correction structure, used for rXY to determine pairs weight
* @param colors number of colors used: 3 for RGB or 1 for monochrome
...
...
@@ -2918,6 +2923,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
int * woi, // x, y, width,height
float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process
int gpu_texture_indices_offset,// add to gpu_texture_indices
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1)
...
...
@@ -2959,7 +2965,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
return; // nothing to do
}
// get number of tile
int tile_code = gpu_texture_indices[tile_indx
];
int tile_code = gpu_texture_indices[tile_indx
+ gpu_texture_indices_offset]; // Added for Java, no DP
if ((tile_code & (1 << CORR_TEXTURE_BIT)) == 0){
return; // nothing to do
}
...
...
@@ -3513,11 +3519,25 @@ __global__ void imclt_rbg_all(
int woi_theight,
const size_t dstride) // in floats (pixels)
{
// int num_cams = sizeof(gpu_clt)/sizeof(&gpu_clt[0]);
dim3 threads_erase8x8(DTT_SIZE, NUM_THREADS/DTT_SIZE, 1);
dim3 grid_erase8x8_right_col (1, woi_theight + 1, 1);
dim3 grid_erase8x8_bottom_row(woi_twidth + 1, 1, 1);
dim3 threads_imclt(IMCLT_THREADS_PER_TILE, IMCLT_TILES_PER_BLOCK, 1);
if (threadIdx.x == 0) { // anyway 1,1,1
for (int ncam = 0; ncam < num_cams; ncam++) { // was NUM_CAMS
for (int color = 0; color < colors; color++) {
// clear right and bottom 8-pixel column and row
float *right_col = gpu_corr_images[ncam] + dstride * (woi_theight * DTT_SIZE + DTT_SIZE) * color + (woi_twidth * DTT_SIZE);
erase8x8<<<grid_erase8x8_right_col,threads_erase8x8>>>(
right_col, // float * gpu_top_left,
dstride); // const size_t dstride);
float *bottom_row = gpu_corr_images[ncam] + dstride * (woi_theight * DTT_SIZE + DTT_SIZE) * color + dstride * (woi_theight * DTT_SIZE);
erase8x8<<<grid_erase8x8_bottom_row,threads_erase8x8>>>(
bottom_row, // float * gpu_top_left,
dstride); // const size_t dstride);
for (int v_offs = 0; v_offs < 2; v_offs++){
for (int h_offs = 0; h_offs < 2; h_offs++){
int tilesy_half = (woi_theight + (v_offs ^ 1)) >> 1;
...
...
@@ -3544,7 +3564,25 @@ __global__ void imclt_rbg_all(
}
}
/**
* Clear 8x8 tiles, used to erase right and bottom 8-pixel wide column/row before imclt_rbg
* @param gpu_top_left - pointer to the top-left corner of the firsr tile to erase
* @param dstride - offset for 1 pixel step down
* block.x - horizontal tile offset
* block.y - vertical tile offset
* 0<=thread.x < 8 - horizontal pixel offset
* 0<=thread.y < 4 - vertical pixel offset
*/
extern "C"
__global__ void erase8x8(
float * gpu_top_left,
const size_t dstride)
{
float * pixel = gpu_top_left + (((blockIdx.y * DTT_SIZE) + threadIdx.y) * dstride) + ((blockIdx.x * DTT_SIZE) + threadIdx.x);
* pixel = 0.0f;
pixel += dstride * blockDim.y; // add 4 pixel rows (assuming blockDim.x==4)
* pixel = 0.0f;
}
/**
* Helper kernel for imclt_rbg_all(), generate per-camera -per color image from the in-memory frequency domain representation.
...
...
@@ -3573,7 +3611,7 @@ __global__ void imclt_rbg(
const size_t dstride) // in floats (pixels)
{
float *color_plane = gpu_rbg + dstride * (woi_theight * DTT_SIZE + DTT_SIZE) * color;
int pass = (v_offset << 1) + h_offset; // 0..3 to correctly ac
um
mulate 16x16 tiles stride 8
int pass = (v_offset << 1) + h_offset; // 0..3 to correctly ac
cu
mulate 16x16 tiles stride 8
int tile_in_block = threadIdx.y;
int tile_num = blockIdx.x * IMCLT_TILES_PER_BLOCK + tile_in_block;
int tilesx_half = (woi_twidth + (h_offset ^ 1)) >> 1;
...
...
src/TileProcessor.h
View file @
53f7f2ae
...
...
@@ -49,7 +49,7 @@ extern "C" __global__ void convert_direct( // called with a single block, single
float
**
gpu_kernel_offsets
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
float
*
gpu_ftasks
,
// flattened tasks, 2
7 floats for quad EO, 99
floats for LWIR16
float
*
gpu_ftasks
,
// flattened tasks, 2
9 floats for quad EO, 101
floats for LWIR16
// struct tp_task * gpu_tasks,
float
**
gpu_clt
,
// [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
dstride
,
// in floats (pixels)
...
...
@@ -76,7 +76,7 @@ extern "C" __global__ void correlate2D(
float
scale1
,
// scale for B
float
scale2
,
// scale for G
float
fat_zero
,
// here - absolute
float
*
gpu_ftasks
,
// flattened tasks, 2
7 floats for quad EO, 99
floats for LWIR16
float
*
gpu_ftasks
,
// flattened tasks, 2
9 floats for quad EO, 101
floats for LWIR16
// struct tp_task * gpu_tasks, // array of per-tile tasks (now bits 4..9 - correlation pairs)
int
num_tiles
,
// number of tiles in task
int
tilesx
,
// number of tile rows
...
...
@@ -109,7 +109,7 @@ extern "C" __global__ void corr2D_combine(
extern
"C"
__global__
void
textures_nonoverlap
(
int
num_cams
,
// number of cameras
float
*
gpu_ftasks
,
// flattened tasks, 2
7 floats for quad EO, 99
floats
float
*
gpu_ftasks
,
// flattened tasks, 2
9 floats for quad EO, 101
floats
// struct tp_task * gpu_tasks,
int
num_tiles
,
// number of tiles in task list
// int num_tilesx, // number of tiles in a row
...
...
@@ -127,6 +127,7 @@ extern "C" __global__ void textures_nonoverlap(
// combining both non-overlap and overlap (each calculated if pointer is not null )
size_t
texture_stride
,
// in floats (now 256*4 = 1024) // may be 0 if not needed
float
*
gpu_texture_tiles
,
// (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
int
linescan_order
,
// 0 low-res tiles have tghe same order, as gpu_texture_indices, 1 - in linescan order
float
*
gpu_diff_rgb_combo
,
//); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
int
num_tilesx
);
...
...
@@ -141,6 +142,10 @@ __global__ void imclt_rbg_all(
int
woi_theight
,
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
__global__
void
erase8x8
(
float
*
gpu_top_left
,
const
size_t
dstride
);
extern
"C"
__global__
void
imclt_rbg
(
float
*
gpu_clt
,
// [TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
...
...
@@ -156,7 +161,7 @@ extern "C" __global__ void imclt_rbg(
extern
"C"
__global__
void
generate_RBGA
(
int
num_cams
,
// number of cameras used
// Parameters to generate texture tasks
float
*
gpu_ftasks
,
// flattened tasks, 2
7 floats for quad EO, 99
floats for LWIR16
float
*
gpu_ftasks
,
// flattened tasks, 2
9 floats for quad EO, 101
floats for LWIR16
// struct tp_task * gpu_tasks,
int
num_tiles
,
// number of tiles in task list
// declare arrays in device code?
...
...
src/test_tp.cu
View file @
53f7f2ae
...
...
@@ -31,8 +31,8 @@
*/
#define NOCORR
#define NOCORR_TD
#define NOTEXTURES_HOST
//
#define NOCORR_TD
//
#define NOTEXTURES_HOST
#define NOTEXTURES
#define NOTEXTURE_RGBA
#define SAVE_CLT
...
...
@@ -492,7 +492,9 @@ void generate_RBGA_host(
gpu_woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
ntt, // size_t num_texture_tiles, // number of texture tiles to process
gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
ti_offset, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
colors, // int colors, // number of colors (3/1)
is_lwir, // int is_lwir, // do not perform shot correction
...
...
@@ -1626,7 +1628,6 @@ int main(int argc, char **argv)
// FIXME: provide sel_pairs
correlate2D<<<1,1>>>( // output TD tiles, no normalization
num_cams, // int num_cams,
// 0, // int * sel_pairs, // unused bits should be 0
sel_pairs[0], // int sel_pairs0 // unused bits should be 0
sel_pairs[1], // int sel_pairs1, // unused bits should be 0
sel_pairs[2], // int sel_pairs2, // unused bits should be 0
...
...
@@ -1638,7 +1639,6 @@ int main(int argc, char **argv)
color_weights[2], // 0.5, // float scale2, // scale for G
30.0, // float fat_zero, // here - absolute
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles) // number of tiles in task
TILESX, // int tilesx, // number of tile rows
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
...
...
@@ -1795,6 +1795,7 @@ int main(int argc, char **argv)
(int *) 0, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
cpu_pnum_texture_tiles, // *pnum_texture_tiles, // size_t num_texture_tiles, // number of texture tiles to process
0, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
texture_colors, // int colors, // number of colors (3/1)
...
...
@@ -1949,7 +1950,7 @@ int main(int argc, char **argv)
// printf("grid_texture=(%d, %d, %d)\n",grid_texture.x,grid_texture.y,grid_texture.z);
StopWatchInterface *timerTEXTURE = 0;
sdkCreateTimer(&timerTEXTURE);
int linescan_order = 1; // output low-res in linescan order, 0 - in gpu_texture_indices order
for (int i = i0; i < numIterations; i++)
{
if (i == 0)
...
...
@@ -1986,7 +1987,8 @@ int main(int argc, char **argv)
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null )
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed\
linescan_order, // int linescan_order, // 0 low-res tiles have tghe same order, as gpu_texture_indices, 1 - in linescan order
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
TILESX);
getLastCudaError("Kernel failure");
...
...
src/tp_defines.h
View file @
53f7f2ae
...
...
@@ -134,7 +134,7 @@
#define DEBUG8 1
#define DEBUG9 1
*/
#define DEBUG8A 1
//#define DEBUG8A 1 // generate_RBGA_host
//textures
//#define DEBUG10 1
//#define DEBUG11 1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment