Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
6c76931e
Commit
6c76931e
authored
Feb 26, 2022
by
Palani Johnson
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ran formatter
parent
4648cb20
Changes
8
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
7432 additions
and
7679 deletions
+7432
-7679
TileProcessor.cuh
src/TileProcessor.cuh
+3697
-3819
TileProcessor.h
src/TileProcessor.h
+133
-128
dtt8x8.cu
src/dtt8x8.cu
+980
-1019
dtt8x8.h
src/dtt8x8.h
+29
-30
geometry_correction.cu
src/geometry_correction.cu
+825
-812
geometry_correction.h
src/geometry_correction.h
+103
-109
test_tp.cu
src/test_tp.cu
+1625
-1718
tp_defines.h
src/tp_defines.h
+40
-44
No files found.
src/TileProcessor.cuh
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/TileProcessor.h
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/dtt8x8.cu
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/dtt8x8.h
View file @
6c76931e
...
@@ -64,17 +64,16 @@
...
@@ -64,17 +64,16 @@
#define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH + 1)
//extern __constant__ float idct_signs[4][4][4];
//extern __constant__ int imclt_indx9[16];
//extern __constant__ float HWINDOW2[];
// extern __constant__ float idct_signs[4][4][4];
// extern __constant__ int imclt_indx9[16];
// extern __constant__ float HWINDOW2[];
// kernels (not used so far)
// kernels (not used so far)
#if 0
#if 0
extern "C" __global__ void GPU_DTT24_DRV(float *dst, float *src, int src_stride, int dtt_mode);
extern "C" __global__ void GPU_DTT24_DRV(float *dst, float *src, int src_stride, int dtt_mode);
#endif// #if 0
#endif
// #if 0
//=========================== 2D functions ===============
//=========================== 2D functions ===============
extern
__device__
void
corrUnfoldTile
(
extern
__device__
void
corrUnfoldTile
(
...
@@ -83,19 +82,19 @@ extern __device__ void corrUnfoldTile(
...
@@ -83,19 +82,19 @@ extern __device__ void corrUnfoldTile(
float
*
rslt
);
// [DTT_SIZE2M1][DTT_SIZE2M1]) // 15x15
float
*
rslt
);
// [DTT_SIZE2M1][DTT_SIZE2M1]) // 15x15
extern
__device__
void
dttii_2d
(
extern
__device__
void
dttii_2d
(
float
*
clt_corr
);
// shared memory, [4][DTT_SIZE1][DTT_SIZE]
float
*
clt_corr
);
// shared memory, [4][DTT_SIZE1][DTT_SIZE]
extern
__device__
void
dttiv_color_2d
(
extern
__device__
void
dttiv_color_2d
(
float
*
clt_tile
,
float
*
clt_tile
,
int
color
);
int
color
);
extern
__device__
void
dttiv_mono_2d
(
extern
__device__
void
dttiv_mono_2d
(
float
*
clt_tile
);
float
*
clt_tile
);
extern
__device__
void
imclt
(
extern
__device__
void
imclt
(
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
mclt_tile
);
float
*
mclt_tile
);
extern
__device__
void
imclt8threads
(
extern
__device__
void
imclt8threads
(
int
do_acc
,
// 1 - add to previous value, 0 - overwrite
int
do_acc
,
// 1 - add to previous value, 0 - overwrite
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
clt_tile
,
// [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float
*
mclt_tile
,
// [2* DTT_SIZE][DTT_SIZE1+ DTT_SIZE], // +1 to alternate column ports[16][17]
float
*
mclt_tile
,
// [2* DTT_SIZE][DTT_SIZE1+ DTT_SIZE], // +1 to alternate column ports[16][17]
int
debug
);
int
debug
);
src/geometry_correction.cu
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/geometry_correction.h
View file @
6c76931e
...
@@ -41,18 +41,16 @@
...
@@ -41,18 +41,16 @@
#include "tp_defines.h"
#include "tp_defines.h"
#endif
#endif
#define NVRTC_BUG 1
#define NVRTC_BUG 1
#ifndef M_PI
#ifndef M_PI
#define M_PI 3.14159265358979323846
/* pi */
#define M_PI 3.14159265358979323846
/* pi */
#endif
#endif
#ifndef offsetof
#ifndef offsetof
#define offsetof(st, m) \
#define offsetof(st, m) \
((size_t)
&
(((st *)0)->m))
((size_t)
&
(((st *)0)->m))
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
//#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
#endif
#define SCENE_UNITS_SCALE 0.001 // meters from mm
#define SCENE_UNITS_SCALE 0.001 // meters from mm
#define MIN_DISPARITY 0.01 // minimal disparity to try to convert to world coordinates
#define MIN_DISPARITY 0.01 // minimal disparity to try to convert to world coordinates
struct
tp_task
{
struct
tp_task
{
...
@@ -68,37 +66,37 @@ struct tp_task {
...
@@ -68,37 +66,37 @@ struct tp_task {
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
};
};
#define get_task_size(x) (sizeof(struct tp_task)
/
sizeof(float) - 6 * (NUM_CAMS - x))
#define get_task_size(x) (sizeof(struct tp_task)
/
sizeof(float) - 6 * (NUM_CAMS - x))
#define tp_task_xy_offset 5
#define tp_task_xy_offset 5
#define tp_task_centerXY_offset 3
#define tp_task_centerXY_offset 3
struct
corr_vector
{
struct
corr_vector
{
float
tilt
[
NUM_CAMS
-
1
];
// 0..2
float
tilt
[
NUM_CAMS
-
1
];
// 0..2
float
azimuth
[
NUM_CAMS
-
1
];
// 3..5
float
azimuth
[
NUM_CAMS
-
1
];
// 3..5
float
roll
[
NUM_CAMS
];
// 6..9
float
roll
[
NUM_CAMS
];
// 6..9
float
zoom
[
NUM_CAMS
-
1
];
// 10..12
float
zoom
[
NUM_CAMS
-
1
];
// 10..12
// for ERS correction:
// for ERS correction:
float
imu_rot
[
3
];
// d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
float
imu_rot
[
3
];
// d_tilt/dt (rad/s), d_az/dt, d_roll/dt 13..15
float
imu_move
[
3
];
// dx/dt, dy/dt, dz/dt 16..19
float
imu_move
[
3
];
// dx/dt, dy/dt, dz/dt 16..19
};
};
#ifdef NVRTC_BUG
#ifdef NVRTC_BUG
struct
trot_deriv
{
struct
trot_deriv
{
float
rots
[
NUM_CAMS
][
3
][
3
];
float
rots
[
NUM_CAMS
][
3
][
3
];
float
d_daz
[
NUM_CAMS
][
3
][
3
];
float
d_daz
[
NUM_CAMS
][
3
][
3
];
float
d_tilt
[
NUM_CAMS
][
3
][
3
];
float
d_tilt
[
NUM_CAMS
][
3
][
3
];
float
d_roll
[
NUM_CAMS
][
3
][
3
];
float
d_roll
[
NUM_CAMS
][
3
][
3
];
float
d_zoom
[
NUM_CAMS
][
3
][
3
];
float
d_zoom
[
NUM_CAMS
][
3
][
3
];
};
};
#else
#else
union
trot_deriv
{
union
trot_deriv
{
struct
{
struct
{
float
rots
[
NUM_CAMS
][
3
][
3
];
float
rots
[
NUM_CAMS
][
3
][
3
];
float
d_daz
[
NUM_CAMS
][
3
][
3
];
float
d_daz
[
NUM_CAMS
][
3
][
3
];
float
d_tilt
[
NUM_CAMS
][
3
][
3
];
float
d_tilt
[
NUM_CAMS
][
3
][
3
];
float
d_roll
[
NUM_CAMS
][
3
][
3
];
float
d_roll
[
NUM_CAMS
][
3
][
3
];
float
d_zoom
[
NUM_CAMS
][
3
][
3
];
float
d_zoom
[
NUM_CAMS
][
3
][
3
];
};
};
float
matrices
[
5
][
NUM_CAMS
][
3
][
3
];
float
matrices
[
5
][
NUM_CAMS
][
3
][
3
];
};
};
#endif
#endif
...
@@ -116,72 +114,68 @@ struct gc {
...
@@ -116,72 +114,68 @@ struct gc {
float
distortionC
;
// r^2
float
distortionC
;
// r^2
float
distortionB
;
// r^3
float
distortionB
;
// r^3
float
distortionA
;
// r^4 (normalized to focal length or to sensor half width?)
float
distortionA
;
// r^4 (normalized to focal length or to sensor half width?)
float
distortionA5
;
//
r^5 (normalized to focal length or to sensor half width?)
float
distortionA5
;
//
r^5 (normalized to focal length or to sensor half width?)
float
distortionA6
;
//
r^6 (normalized to focal length or to sensor half width?)
float
distortionA6
;
//
r^6 (normalized to focal length or to sensor half width?)
float
distortionA7
;
//
r^7 (normalized to focal length or to sensor half width?)
float
distortionA7
;
//
r^7 (normalized to focal length or to sensor half width?)
float
distortionA8
;
//
r^8 (normalized to focal length or to sensor half width?)
float
distortionA8
;
//
r^8 (normalized to focal length or to sensor half width?)
#ifndef NVRTC_BUG
#ifndef NVRTC_BUG
};
};
float
rad_coeff
[
7
];
float
rad_coeff
[
7
];
};
};
#endif
#endif
// parameters, common for all sensors
// parameters, common for all sensors
float
elevation
;
// degrees, up - positive;
float
elevation
;
// degrees, up - positive;
float
heading
;
// degrees, CW (from top) - positive
float
heading
;
// degrees, CW (from top) - positive
float
forward
[
NUM_CAMS
];
float
forward
[
NUM_CAMS
];
float
right
[
NUM_CAMS
];
float
right
[
NUM_CAMS
];
float
height
[
NUM_CAMS
];
float
height
[
NUM_CAMS
];
float
roll
[
NUM_CAMS
];
// degrees, CW (to target) - positive
float
roll
[
NUM_CAMS
];
// degrees, CW (to target) - positive
float
pXY0
[
NUM_CAMS
][
2
];
float
pXY0
[
NUM_CAMS
][
2
];
float
common_right
;
// mm right, camera center
float
common_right
;
// mm right, camera center
float
common_forward
;
// mm forward (to target), camera center
float
common_forward
;
// mm forward (to target), camera center
float
common_height
;
// mm up, camera center
float
common_height
;
// mm up, camera center
float
common_roll
;
// degrees CW (to target) camera as a whole
float
common_roll
;
// degrees CW (to target) camera as a whole
// float [][] XYZ_he; // all cameras coordinates transformed to eliminate heading and elevation (rolls preserved)
// float [][] XYZ_he; // all cameras coordinates transformed to eliminate heading and elevation (rolls preserved)
// float [][] XYZ_her = null; // XYZ of the lenses in a corrected CCS (adjusted for to elevation, heading, common_roll)
// float [][] XYZ_her = null; // XYZ of the lenses in a corrected CCS (adjusted for to elevation, heading, common_roll)
float
rXY
[
NUM_CAMS
][
2
];
// XY pairs of the in a normal plane, relative to disparityRadius
float
rXY
[
NUM_CAMS
][
2
];
// XY pairs of the in a normal plane, relative to disparityRadius
// float [][] rXY_ideal = {{-0.5, -0.5}, {0.5,-0.5}, {-0.5, 0.5}, {0.5,0.5}};
// float [][] rXY_ideal = {{-0.5, -0.5}, {0.5,-0.5}, {-0.5, 0.5}, {0.5,0.5}};
// only used for the multi-quad systems
// only used for the multi-quad systems
float
cameraRadius
;
// =0; // average distance from the "mass center" of the sensors to the sensors
float
cameraRadius
;
// =0; // average distance from the "mass center" of the sensors to the sensors
float
disparityRadius
;
// =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
float
disparityRadius
;
// =150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
float
woi_tops
[
NUM_CAMS
];
// used to calculate scanline timing
float
woi_tops
[
NUM_CAMS
];
// used to calculate scanline timing
};
};
#define RAD_COEFF_LEN 7
#define RAD_COEFF_LEN 7
extern
"C"
__global__
void
get_tiles_offsets
(
extern
"C"
__global__
void
get_tiles_offsets
(
int
uniform_grid
,
//==0: use provided centers (as for interscene) , !=0 calculate uniform grid
int
uniform_grid
,
//==0: use provided centers (as for interscene) , !=0 calculate uniform grid
int
num_cams
,
int
num_cams
,
// struct tp_task * gpu_tasks,
// struct tp_task * gpu_tasks,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
struct
gc
*
gpu_geometry_correction
,
struct
gc
*
gpu_geometry_correction
,
struct
corr_vector
*
gpu_correction_vector
,
struct
corr_vector
*
gpu_correction_vector
,
float
*
gpu_rByRDist
,
// length should match RBYRDIST_LEN
float
*
gpu_rByRDist
,
// length should match RBYRDIST_LEN
trot_deriv
*
gpu_rot_deriv
);
trot_deriv
*
gpu_rot_deriv
);
extern
"C"
__global__
void
calculate_tiles_offsets
(
extern
"C"
__global__
void
calculate_tiles_offsets
(
int
uniform_grid
,
//==0: use provided centers (as for interscene) , !=0 calculate uniform grid
int
uniform_grid
,
//==0: use provided centers (as for interscene) , !=0 calculate uniform grid
int
num_cams
,
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// struct tp_task * gpu_tasks,
// struct tp_task * gpu_tasks,
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
struct
gc
*
gpu_geometry_correction
,
struct
gc
*
gpu_geometry_correction
,
struct
corr_vector
*
gpu_correction_vector
,
struct
corr_vector
*
gpu_correction_vector
,
float
*
gpu_rByRDist
,
// length should match RBYRDIST_LEN
float
*
gpu_rByRDist
,
// length should match RBYRDIST_LEN
trot_deriv
*
gpu_rot_deriv
);
trot_deriv
*
gpu_rot_deriv
);
// uses NUM_CAMS blocks, (3,3,3) threads
// uses NUM_CAMS blocks, (3,3,3) threads
extern
"C"
__global__
void
calc_rot_deriv
(
extern
"C"
__global__
void
calc_rot_deriv
(
int
num_cams
,
int
num_cams
,
struct
corr_vector
*
gpu_correction_vector
,
struct
corr_vector
*
gpu_correction_vector
,
trot_deriv
*
gpu_rot_deriv
);
trot_deriv
*
gpu_rot_deriv
);
#define CALC_REVERSE_TABLE_BLOCK_THREADS (NUM_CAMS * 3 * 3 * 3) // fixed blockDim
#define CALC_REVERSE_TABLE_BLOCK_THREADS (NUM_CAMS * 3 * 3 * 3) // fixed blockDim
// Use same blocks/threads as with calc_rot_deriv() - NUM_CAMS blocks, (3,3,3) threads
// Use same blocks/threads as with calc_rot_deriv() - NUM_CAMS blocks, (3,3,3) threads
extern
"C"
__global__
void
calcReverseDistortionTable
(
extern
"C"
__global__
void
calcReverseDistortionTable
(
struct
gc
*
geometry_correction
,
struct
gc
*
geometry_correction
,
float
*
rByRDist
);
float
*
rByRDist
);
src/test_tp.cu
View file @
6c76931e
This diff is collapsed.
Click to expand it.
src/tp_defines.h
View file @
6c76931e
...
@@ -77,7 +77,7 @@
...
@@ -77,7 +77,7 @@
#define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_LEN 5001 // for doubles 10001 - floats // length of rByRDist to allocate shared memory
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM
(32/NUM_CAMS)
// each tile has NUM_CAMS threads
#define TILES_PER_BLOCK_GEOM
(32 / NUM_CAMS)
// each tile has NUM_CAMS threads
#define DEBUG_ANY 1
#define DEBUG_ANY 1
...
@@ -87,13 +87,13 @@
...
@@ -87,13 +87,13 @@
//#define DBG_TILE_X 40
//#define DBG_TILE_X 40
//#define DBG_TILE_Y 80
//#define DBG_TILE_Y 80
#if TEST_LWIR
#if TEST_LWIR
#define DBG_TILE_X 50
// 52 // 32 // 162 // 151 // 161 // 49
#define DBG_TILE_X 50
// 52 // 32 // 162 // 151 // 161 // 49
#define DBG_TILE_Y 19
// 5 // 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE_Y 19
// 5 // 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE
(DBG_TILE_Y * 80 + DBG_TILE_X)
#define DBG_TILE
(DBG_TILE_Y * 80 + DBG_TILE_X)
#else
#else
#define DBG_TILE_X 114
// 32 // 162 // 151 // 161 // 49
#define DBG_TILE_X 114
// 32 // 162 // 151 // 161 // 49
#define DBG_TILE_Y 51
// 52 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE_Y 51
// 52 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE
(DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE
(DBG_TILE_Y * 324 + DBG_TILE_X)
#endif
#endif
#undef DBG_MARK_DBG_TILE
#undef DBG_MARK_DBG_TILE
//#undef DBG_TILE
//#undef DBG_TILE
...
@@ -101,8 +101,7 @@
...
@@ -101,8 +101,7 @@
//#undef HAS_PRINTF
//#undef HAS_PRINTF
#define HAS_PRINTF
#define HAS_PRINTF
// 7
//7
//#define DEBUG1 1
//#define DEBUG1 1
//#define DEBUG2 1
//#define DEBUG2 1
//#define DEBUG3 1
//#define DEBUG3 1
...
@@ -118,7 +117,7 @@
...
@@ -118,7 +117,7 @@
#define DEBUG9 1
#define DEBUG9 1
*/
*/
//#define DEBUG8A 1 // generate_RBGA_host
//#define DEBUG8A 1 // generate_RBGA_host
//textures
//
textures
//#define DEBUG10 1
//#define DEBUG10 1
//#define DEBUG11 1
//#define DEBUG11 1
//#define DEBUG12 1
//#define DEBUG12 1
...
@@ -127,7 +126,6 @@
...
@@ -127,7 +126,6 @@
// geom
// geom
//#define DEBUG20 1
//#define DEBUG20 1
#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
//#define DEBUG20 1 // Geometry Correction
//#define DEBUG20 1 // Geometry Correction
//#define DEBUG21 1 // Geometry Correction
//#define DEBUG21 1 // Geometry Correction
...
@@ -140,6 +138,4 @@
...
@@ -140,6 +138,4 @@
#endif //#ifdef DEBUG_ANY
#endif //#ifdef DEBUG_ANY
#endif //#ifndef JCUDA
#endif //#ifndef JCUDA
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment