Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
6c76931e
Commit
6c76931e
authored
Feb 26, 2022
by
Palani Johnson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ran formatter
parent
4648cb20
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
7432 additions
and
7679 deletions
+7432
-7679
TileProcessor.cuh
src/TileProcessor.cuh
+3697
-3819
TileProcessor.h
src/TileProcessor.h
+133
-128
dtt8x8.cu
src/dtt8x8.cu
+980
-1019
dtt8x8.h
src/dtt8x8.h
+29
-30
geometry_correction.cu
src/geometry_correction.cu
+825
-812
geometry_correction.h
src/geometry_correction.h
+103
-109
test_tp.cu
src/test_tp.cu
+1625
-1718
tp_defines.h
src/tp_defines.h
+40
-44
No files found.
src/TileProcessor.cuh
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/TileProcessor.h
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/dtt8x8.cu
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/dtt8x8.h
View file @
6c76931e
...
@@ -45,57 +45,56 @@
...
@@ -45,57 +45,56 @@
* with Nvidia Nsight, driver API when calling these kernels from Java
* with Nvidia Nsight, driver API when calling these kernels from Java
*/
*/
#ifndef JCUDA
#ifndef JCUDA
#define DTT_SIZE_LOG2
3
#define DTT_SIZE_LOG2 3
#endif
#endif
#pragma once
#pragma once
#define DTT_SIZE
(1 << DTT_SIZE_LOG2)
#define DTT_SIZE (1 << DTT_SIZE_LOG2)
#define DTT_SIZE1
(DTT_SIZE + 1)
#define DTT_SIZE1 (DTT_SIZE + 1)
#define DTT_SIZE2
(2 * DTT_SIZE)
#define DTT_SIZE2 (2 * DTT_SIZE)
#define DTT_SIZE21
(DTT_SIZE2 + 1)
#define DTT_SIZE21 (DTT_SIZE2 + 1)
#define DTT_SIZE4
(4 * DTT_SIZE)
#define DTT_SIZE4 (4 * DTT_SIZE)
#define DTT_SIZE2M1
(DTT_SIZE2 - 1)
#define DTT_SIZE2M1 (DTT_SIZE2 - 1)
#define BAYER_RED
0
#define BAYER_RED 0
#define BAYER_BLUE
1
#define BAYER_BLUE 1
#define BAYER_GREEN 2
#define BAYER_GREEN 2
// assuming GR/BG as now
// assuming GR/BG as now
#define BAYER_RED_ROW 0
#define BAYER_RED_ROW 0
#define BAYER_RED_COL 1
#define BAYER_RED_COL 1
#define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH + 1)
//extern __constant__ float idct_signs[4][4][4];
//extern __constant__ int imclt_indx9[16];
//extern __constant__ float HWINDOW2[];
// extern __constant__ float idct_signs[4][4][4];
// extern __constant__ int imclt_indx9[16];
// extern __constant__ float HWINDOW2[];
// kernels (not used so far)
// kernels (not used so far)
#if 0
#if 0
extern "C" __global__ void GPU_DTT24_DRV(float *dst, float *src, int src_stride, int dtt_mode);
extern "C" __global__ void GPU_DTT24_DRV(float *dst, float *src, int src_stride, int dtt_mode);
#endif// #if 0
#endif
// #if 0
//=========================== 2D functions ===============
//=========================== 2D functions ===============
extern
__device__
void
corrUnfoldTile
(
extern
__device__
void
corrUnfoldTile
(
int
corr_radius
,
int
corr_radius
,
float
*
qdata0
,
// [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data, rows extended to optimize shared ports
float
*
qdata0
,
// [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data, rows extended to optimize shared ports
float
*
rslt
);
// [DTT_SIZE2M1][DTT_SIZE2M1]) // 15x15
float
*
rslt
);
// [DTT_SIZE2M1][DTT_SIZE2M1]) // 15x15
extern
__device__
void
dttii_2d
(
extern
__device__
void
dttii_2d
(
float
*
clt_corr
);
// shared memory, [4][DTT_SIZE1][DTT_SIZE]
float
*
clt_corr
);
// shared memory, [4][DTT_SIZE1][DTT_SIZE]
extern
__device__
void
dttiv_color_2d
(
extern
__device__
void
dttiv_color_2d
(
float
*
clt_tile
,
float
*
clt_tile
,
int
color
);
int
color
);
extern
__device__
void
dttiv_mono_2d
(
extern
__device__
void
dttiv_mono_2d
(
float
*
clt_tile
);
float
*
clt_tile
);
extern
__device__
void
imclt
(
extern
__device__
void
imclt
(
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
mclt_tile
);
float
*
mclt_tile
);
extern
__device__
void
imclt8threads
(
extern
__device__
void
imclt8threads
(
int
do_acc
,
// 1 - add to previous value, 0 - overwrite
int
do_acc
,
// 1 - add to previous value, 0 - overwrite
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
mclt_tile
,
// [2* DTT_SIZE][DTT_SIZE1+ DTT_SIZE], // +1 to alternate column ports[16][17]
float
*
mclt_tile
,
// [2* DTT_SIZE][DTT_SIZE1+ DTT_SIZE], // +1 to alternate column ports[16][17]
int
debug
);
int
debug
);
src/geometry_correction.cu
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/geometry_correction.h
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/test_tp.cu
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/tp_defines.h
View file @
6c76931e
...
@@ -39,61 +39,61 @@
...
@@ -39,61 +39,61 @@
// Avoiding includes in jcuda, all source files will be merged
// Avoiding includes in jcuda, all source files will be merged
#pragma once
#pragma once
#ifndef JCUDA
#ifndef JCUDA
#define TEST_LWIR
1
#define TEST_LWIR 1
#include <stdio.h>
#include <stdio.h>
#define THREADSX
(DTT_SIZE)
#define THREADSX (DTT_SIZE)
#define NUM_CAMS
16
// now maximal number of cameras
#define NUM_CAMS
16
// now maximal number of cameras
//#define NUM_PAIRS 6
//#define NUM_PAIRS 6
//#define NUM_COLORS 1 //3
//#define NUM_COLORS 1 //3
// kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64]
// kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64]
#define KERNELS_LSTEP
4
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE
8
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK
4
#define TILES_PER_BLOCK 4
#define CORR_THREADS_PER_TILE
8
#define CORR_THREADS_PER_TILE 8
#define CORR_TILES_PER_BLOCK
4
#define CORR_TILES_PER_BLOCK 4
#define CORR_TILES_PER_BLOCK_NORMALIZE 4 // increase to 8?
#define CORR_TILES_PER_BLOCK_NORMALIZE 4
// increase to 8?
#define CORR_TILES_PER_BLOCK_COMBINE
4
// increase to 16?
#define CORR_TILES_PER_BLOCK_COMBINE
4
// increase to 16?
//#define TEXTURE_THREADS 32 //
//#define TEXTURE_THREADS 32 //
#define NUM_THREADS
32
#define NUM_THREADS 32
#define TEXTURE_THREADS_PER_TILE
8
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_TILES_PER_BLOCK
1
#define TEXTURE_TILES_PER_BLOCK 1
#define IMCLT_THREADS_PER_TILE
16
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK
4
#define IMCLT_TILES_PER_BLOCK 4
#define CORR_NTILE_SHIFT
8
// higher bits - number of a pair, other bits tile number
#define CORR_NTILE_SHIFT
8
// higher bits - number of a pair, other bits tile number
// only lower bit will be used to request correlations, correlation mask will be common for all the scene
// only lower bit will be used to request correlations, correlation mask will be common for all the scene
//#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
//#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT
7
// bit 7 used to request texture for the tile
#define CORR_TEXTURE_BIT
7
// bit 7 used to request texture for the tile
#define TASK_CORR_BITS
4
#define TASK_CORR_BITS 4
#define TASK_TEXTURE_N_BIT
0
// Texture with North neighbor
#define TASK_TEXTURE_N_BIT
0
// Texture with North neighbor
#define TASK_TEXTURE_E_BIT
1
// Texture with East neighbor
#define TASK_TEXTURE_E_BIT
1
// Texture with East neighbor
#define TASK_TEXTURE_S_BIT
2
// Texture with South neighbor
#define TASK_TEXTURE_S_BIT
2
// Texture with South neighbor
#define TASK_TEXTURE_W_BIT
3
// Texture with West neighbor
#define TASK_TEXTURE_W_BIT
3
// Texture with West neighbor
//#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
//#define TASK_TEXTURE_BIT 3 // bit to request texture calculation int task field of struct tp_task
#define LIST_TEXTURE_BIT
7
// bit to request texture calculation
#define LIST_TEXTURE_BIT
7
// bit to request texture calculation
//#define CORR_OUT_RAD 7 // full tile (15x15), was 4 (9x9)
//#define CORR_OUT_RAD 7 // full tile (15x15), was 4 (9x9)
#define FAT_ZERO_WEIGHT
0.0001
// add to port weights to avoid nan
#define FAT_ZERO_WEIGHT
0.0001
// add to port weights to avoid nan
#define THREADS_DYNAMIC_BITS
5
// treads in block for CDP creation of the texture list
#define THREADS_DYNAMIC_BITS
5
// treads in block for CDP creation of the texture list
#define RBYRDIST_LEN
5001
// for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_LEN
5001
// for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_STEP
0.0004
// for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define RBYRDIST_STEP
0.0004
// for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM
(32/NUM_CAMS)
// each tile has NUM_CAMS threads
#define TILES_PER_BLOCK_GEOM
(32 / NUM_CAMS)
// each tile has NUM_CAMS threads
#define DEBUG_ANY 1
#define DEBUG_ANY 1
#ifdef
DEBUG_ANY
#ifdef DEBUG_ANY
//#define DEBUG_OOB1 1
//#define DEBUG_OOB1 1
// Use CORR_OUT_RAD for the correlation output
// Use CORR_OUT_RAD for the correlation output
//#define DBG_TILE_X 40
//#define DBG_TILE_X 40
//#define DBG_TILE_Y 80
//#define DBG_TILE_Y 80
#if TEST_LWIR
#if TEST_LWIR
#define DBG_TILE_X 50
// 52 // 32 // 162 // 151 // 161 // 49
#define DBG_TILE_X 50
// 52 // 32 // 162 // 151 // 161 // 49
#define DBG_TILE_Y 19
// 5 // 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE_Y 19
// 5 // 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE
(DBG_TILE_Y * 80 + DBG_TILE_X)
#define DBG_TILE
(DBG_TILE_Y * 80 + DBG_TILE_X)
#else
#else
#define DBG_TILE_X 114
// 32 // 162 // 151 // 161 // 49
#define DBG_TILE_X 114
// 32 // 162 // 151 // 161 // 49
#define DBG_TILE_Y 51
// 52 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE_Y 51
// 52 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE
(DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE
(DBG_TILE_Y * 324 + DBG_TILE_X)
#endif
#endif
#undef DBG_MARK_DBG_TILE
#undef DBG_MARK_DBG_TILE
//#undef DBG_TILE
//#undef DBG_TILE
...
@@ -101,8 +101,7 @@
...
@@ -101,8 +101,7 @@
//#undef HAS_PRINTF
//#undef HAS_PRINTF
#define HAS_PRINTF
#define HAS_PRINTF
// 7
//7
//#define DEBUG1 1
//#define DEBUG1 1
//#define DEBUG2 1
//#define DEBUG2 1
//#define DEBUG3 1
//#define DEBUG3 1
...
@@ -118,7 +117,7 @@
...
@@ -118,7 +117,7 @@
#define DEBUG9 1
#define DEBUG9 1
*/
*/
//#define DEBUG8A 1 // generate_RBGA_host
//#define DEBUG8A 1 // generate_RBGA_host
//textures
//
textures
//#define DEBUG10 1
//#define DEBUG10 1
//#define DEBUG11 1
//#define DEBUG11 1
//#define DEBUG12 1
//#define DEBUG12 1
...
@@ -127,7 +126,6 @@
...
@@ -127,7 +126,6 @@
// geom
// geom
//#define DEBUG20 1
//#define DEBUG20 1
#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
//#define DEBUG20 1 // Geometry Correction
//#define DEBUG20 1 // Geometry Correction
//#define DEBUG21 1 // Geometry Correction
//#define DEBUG21 1 // Geometry Correction
...
@@ -136,10 +134,8 @@
...
@@ -136,10 +134,8 @@
//#define DEBUG22 1
//#define DEBUG22 1
//#define DEBUG23 1
//#define DEBUG23 1
#endif //#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
#endif //#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
#endif //#ifdef DEBUG_ANY
#endif
//#ifndef JCUDA
#endif
//#ifdef DEBUG_ANY
#endif //#ifndef JCUDA
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment