Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
44e87f14
Commit
44e87f14
authored
Aug 07, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
more TILESX
parent
bb7792f8
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
115 additions
and
91 deletions
+115
-91
TileProcessor.cuh
src/TileProcessor.cuh
+93
-81
TileProcessor.h
src/TileProcessor.h
+11
-9
test_tp.cu
src/test_tp.cu
+3
-1
tp_defines.h
src/tp_defines.h
+8
-0
No files found.
src/TileProcessor.cuh
View file @
44e87f14
This diff is collapsed.
Click to expand it.
src/TileProcessor.h
View file @
44e87f14
...
@@ -48,7 +48,7 @@ extern "C" __global__ void convert_direct( // called with a single block, single
...
@@ -48,7 +48,7 @@ extern "C" __global__ void convert_direct( // called with a single block, single
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
struct
tp_task
*
gpu_tasks
,
struct
tp_task
*
gpu_tasks
,
float
**
gpu_clt
,
// [NUM_CAMS][TILES
Y][TILES
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS][TILES
-Y][TILES-
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
dstride
,
// in floats (pixels)
size_t
dstride
,
// in floats (pixels)
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
int
lpf_mask
,
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
int
lpf_mask
,
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
...
@@ -57,10 +57,12 @@ extern "C" __global__ void convert_direct( // called with a single block, single
...
@@ -57,10 +57,12 @@ extern "C" __global__ void convert_direct( // called with a single block, single
int
kernels_hor
,
int
kernels_hor
,
int
kernels_vert
,
int
kernels_vert
,
int
*
gpu_active_tiles
,
// pointer to the calculated number of non-zero tiles
int
*
gpu_active_tiles
,
// pointer to the calculated number of non-zero tiles
int
*
pnum_active_tiles
);
// indices to gpu_tasks
int
*
pnum_active_tiles
,
// indices to gpu_tasks
int
tilesx
);
extern
"C"
__global__
void
correlate2D
(
extern
"C"
__global__
void
correlate2D
(
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILES
Y][TILES
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILES
-Y][TILES-
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
int
colors
,
// number of colors (3/1)
int
colors
,
// number of colors (3/1)
float
scale0
,
// scale for R
float
scale0
,
// scale for R
float
scale1
,
// scale for B
float
scale1
,
// scale for B
...
@@ -83,7 +85,7 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -83,7 +85,7 @@ extern "C" __global__ void textures_nonoverlap(
// declare arrays in device code?
// declare arrays in device code?
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
pnum_texture_tiles
,
// returns total number of elements in gpu_texture_indices array
int
*
pnum_texture_tiles
,
// returns total number of elements in gpu_texture_indices array
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILES
Y][TILES
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILES
-Y][TILES-
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
// TODO: use geometry_correction rXY !
struct
gc
*
gpu_geometry_correction
,
struct
gc
*
gpu_geometry_correction
,
int
colors
,
// number of colors (3/1)
int
colors
,
// number of colors (3/1)
...
@@ -99,7 +101,7 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -99,7 +101,7 @@ extern "C" __global__ void textures_nonoverlap(
extern
"C"
extern
"C"
__global__
void
imclt_rbg_all
(
__global__
void
imclt_rbg_all
(
float
**
gpu_clt
,
// [NUM_CAMS][TILES
Y][TILES
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS][TILES
-Y][TILES-
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_corr_images
,
// [NUM_CAMS][WIDTH, 3 * HEIGHT]
float
**
gpu_corr_images
,
// [NUM_CAMS][WIDTH, 3 * HEIGHT]
int
apply_lpf
,
int
apply_lpf
,
int
colors
,
int
colors
,
...
@@ -108,7 +110,7 @@ __global__ void imclt_rbg_all(
...
@@ -108,7 +110,7 @@ __global__ void imclt_rbg_all(
const
size_t
dstride
);
// in floats (pixels)
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
__global__
void
imclt_rbg
(
extern
"C"
__global__
void
imclt_rbg
(
float
*
gpu_clt
,
// [TILES
Y][TILES
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_clt
,
// [TILES
-Y][TILES-
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
int
apply_lpf
,
int
apply_lpf
,
int
mono
,
// defines lpf filter
int
mono
,
// defines lpf filter
...
@@ -127,10 +129,10 @@ extern "C" __global__ void generate_RBGA(
...
@@ -127,10 +129,10 @@ extern "C" __global__ void generate_RBGA(
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
num_texture_tiles
,
// number of texture tiles to process (8 separate elements for accumulation)
int
*
num_texture_tiles
,
// number of texture tiles to process (8 separate elements for accumulation)
int
*
woi
,
// x,y,width,height of the woi
int
*
woi
,
// x,y,width,height of the woi
int
width
,
// <= TILESX, use for faster processing of LWIR images (should be actual + 1)
int
width
,
// <= TILES
-
X, use for faster processing of LWIR images (should be actual + 1)
int
height
,
// <= TILESY, use for faster processing of LWIR images
int
height
,
// <= TILES
-
Y, use for faster processing of LWIR images
// Parameters for the texture generation
// Parameters for the texture generation
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILES
Y][TILES
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILES
-Y][TILES-
X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
// TODO: use geometry_correction rXY !
struct
gc
*
gpu_geometry_correction
,
struct
gc
*
gpu_geometry_correction
,
int
colors
,
// number of colors (3/1)
int
colors
,
// number of colors (3/1)
...
...
src/test_tp.cu
View file @
44e87f14
...
@@ -870,7 +870,9 @@ int main(int argc, char **argv)
...
@@ -870,7 +870,9 @@ int main(int argc, char **argv)
KERNELS_HOR, // int kernels_hor,
KERNELS_HOR, // int kernels_hor,
KERNELS_VERT, // int kernels_vert);
KERNELS_VERT, // int kernels_vert);
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
gpu_active_tiles, // int * gpu_active_tiles, // pointer to the calculated number of non-zero tiles
gpu_num_active); // int * pnum_active_tiles); // indices to gpu_tasks
gpu_num_active, //); // int * pnum_active_tiles); // indices to gpu_tasks
TILESX); // int tilesx)
getLastCudaError("Kernel execution failed");
getLastCudaError("Kernel execution failed");
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaDeviceSynchronize());
...
...
src/tp_defines.h
View file @
44e87f14
...
@@ -77,6 +77,14 @@
...
@@ -77,6 +77,14 @@
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
// only used in C++ test
#define TILESX (IMG_WIDTH / DTT_SIZE)
//#define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESYA ((TILESY +3) & (~3))
#define DEBUG_OOB1 1
#define DEBUG_OOB1 1
// Use CORR_OUT_RAD for the correlation output
// Use CORR_OUT_RAD for the correlation output
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment