Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
a51d6a77
Commit
a51d6a77
authored
Nov 25, 2021
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
More editing to make dynamic number of cameras
parent
ee0cfc3b
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
231 additions
and
120 deletions
+231
-120
TileProcessor.cuh
src/TileProcessor.cuh
+128
-82
TileProcessor.h
src/TileProcessor.h
+5
-3
geometry_correction.cu
src/geometry_correction.cu
+75
-28
geometry_correction.h
src/geometry_correction.h
+12
-3
test_tp.cu
src/test_tp.cu
+9
-3
tp_defines.h
src/tp_defines.h
+2
-1
No files found.
src/TileProcessor.cuh
View file @
a51d6a77
This diff is collapsed.
Click to expand it.
src/TileProcessor.h
View file @
a51d6a77
...
@@ -104,8 +104,9 @@ extern "C" __global__ void corr2D_combine(
...
@@ -104,8 +104,9 @@ extern "C" __global__ void corr2D_combine(
float
*
gpu_corrs_combo
);
// combined correlation output (one per tile)
float
*
gpu_corrs_combo
);
// combined correlation output (one per tile)
extern
"C"
__global__
void
textures_nonoverlap
(
extern
"C"
__global__
void
textures_nonoverlap
(
int
num_cams
,
// number of cameras used
int
num_cams
,
// number of cameras
struct
tp_task
*
gpu_tasks
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats
// struct tp_task * gpu_tasks,
int
num_tiles
,
// number of tiles in task list
int
num_tiles
,
// number of tiles in task list
// int num_tilesx, // number of tiles in a row
// int num_tilesx, // number of tiles in a row
// declare arrays in device code?
// declare arrays in device code?
...
@@ -151,7 +152,8 @@ extern "C" __global__ void imclt_rbg(
...
@@ -151,7 +152,8 @@ extern "C" __global__ void imclt_rbg(
extern
"C"
__global__
void
generate_RBGA
(
extern
"C"
__global__
void
generate_RBGA
(
int
num_cams
,
// number of cameras used
int
num_cams
,
// number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
struct
tp_task
*
gpu_tasks
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// struct tp_task * gpu_tasks,
int
num_tiles
,
// number of tiles in task list
int
num_tiles
,
// number of tiles in task list
// declare arrays in device code?
// declare arrays in device code?
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
int
*
gpu_texture_indices
,
// packed tile + bits (now only (1 << 7)
...
...
src/geometry_correction.cu
View file @
a51d6a77
This diff is collapsed.
Click to expand it.
src/geometry_correction.h
View file @
a51d6a77
...
@@ -41,6 +41,8 @@
...
@@ -41,6 +41,8 @@
#include "tp_defines.h"
#include "tp_defines.h"
#endif
#endif
#define get_task_size(x) (sizeof(struct tp_task)/sizeof(float) - 6 * (NUM_CAMS - x))
#define NVRTC_BUG 1
#define NVRTC_BUG 1
#ifndef M_PI
#ifndef M_PI
#define M_PI 3.14159265358979323846
/* pi */
#define M_PI 3.14159265358979323846
/* pi */
...
@@ -60,8 +62,9 @@ struct tp_task {
...
@@ -60,8 +62,9 @@ struct tp_task {
int
txy
;
int
txy
;
unsigned
short
sxy
[
2
];
unsigned
short
sxy
[
2
];
};
};
float
xy
[
NUM_CAMS
][
2
];
float
target_disparity
;
float
target_disparity
;
float
xy
[
NUM_CAMS
][
2
];
// float target_disparity;
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
float
disp_dist
[
NUM_CAMS
][
4
];
// calculated with getPortsCoordinates()
};
};
...
@@ -142,7 +145,9 @@ struct gc {
...
@@ -142,7 +145,9 @@ struct gc {
};
};
#define RAD_COEFF_LEN 7
#define RAD_COEFF_LEN 7
extern
"C"
__global__
void
get_tiles_offsets
(
extern
"C"
__global__
void
get_tiles_offsets
(
struct
tp_task
*
gpu_tasks
,
int
num_cams
,
// struct tp_task * gpu_tasks,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
struct
gc
*
gpu_geometry_correction
,
struct
gc
*
gpu_geometry_correction
,
struct
corr_vector
*
gpu_correction_vector
,
struct
corr_vector
*
gpu_correction_vector
,
...
@@ -150,7 +155,9 @@ extern "C" __global__ void get_tiles_offsets(
...
@@ -150,7 +155,9 @@ extern "C" __global__ void get_tiles_offsets(
trot_deriv
*
gpu_rot_deriv
);
trot_deriv
*
gpu_rot_deriv
);
extern
"C"
__global__
void
calculate_tiles_offsets
(
extern
"C"
__global__
void
calculate_tiles_offsets
(
struct
tp_task
*
gpu_tasks
,
int
num_cams
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// struct tp_task * gpu_tasks,
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
struct
gc
*
gpu_geometry_correction
,
struct
gc
*
gpu_geometry_correction
,
struct
corr_vector
*
gpu_correction_vector
,
struct
corr_vector
*
gpu_correction_vector
,
...
@@ -160,6 +167,7 @@ extern "C" __global__ void calculate_tiles_offsets(
...
@@ -160,6 +167,7 @@ extern "C" __global__ void calculate_tiles_offsets(
// uses NUM_CAMS blocks, (3,3,3) threads
// uses NUM_CAMS blocks, (3,3,3) threads
extern
"C"
__global__
void
calc_rot_deriv
(
extern
"C"
__global__
void
calc_rot_deriv
(
int
num_cams
,
struct
corr_vector
*
gpu_correction_vector
,
struct
corr_vector
*
gpu_correction_vector
,
trot_deriv
*
gpu_rot_deriv
);
trot_deriv
*
gpu_rot_deriv
);
...
@@ -170,3 +178,4 @@ extern "C" __global__ void calcReverseDistortionTable(
...
@@ -170,3 +178,4 @@ extern "C" __global__ void calcReverseDistortionTable(
float
*
rByRDist
);
float
*
rByRDist
);
src/test_tp.cu
View file @
a51d6a77
...
@@ -715,6 +715,7 @@ int main(int argc, char **argv)
...
@@ -715,6 +715,7 @@ int main(int argc, char **argv)
}
}
calc_rot_deriv<<<grid_rot,threads_rot>>> (
calc_rot_deriv<<<grid_rot,threads_rot>>> (
num_cams, // int num_cams,
gpu_correction_vector , // struct corr_vector * gpu_correction_vector,
gpu_correction_vector , // struct corr_vector * gpu_correction_vector,
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
...
@@ -821,6 +822,7 @@ int main(int argc, char **argv)
...
@@ -821,6 +822,7 @@ int main(int argc, char **argv)
}
}
/*
/*
get_tiles_offsets<<<grid_geom,threads_geom>>> (
get_tiles_offsets<<<grid_geom,threads_geom>>> (
num_cams, // int num_cams,
gpu_tasks, // struct tp_task * gpu_tasks,
gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list
tp_task_size, // int num_tiles, // number of tiles in task list
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
...
@@ -829,7 +831,9 @@ int main(int argc, char **argv)
...
@@ -829,7 +831,9 @@ int main(int argc, char **argv)
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
*/
*/
calculate_tiles_offsets<<<1,1>>> (
calculate_tiles_offsets<<<1,1>>> (
gpu_tasks, // struct tp_task * gpu_tasks,
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list
tp_task_size, // int num_tiles, // number of tiles in task list
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_correction_vector, // struct corr_vector * gpu_correction_vector,
gpu_correction_vector, // struct corr_vector * gpu_correction_vector,
...
@@ -1273,7 +1277,8 @@ int main(int argc, char **argv)
...
@@ -1273,7 +1277,8 @@ int main(int argc, char **argv)
cudaFuncSetAttribute(textures_nonoverlap, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
cudaFuncSetAttribute(textures_nonoverlap, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
textures_nonoverlap<<<1,1>>> (
textures_nonoverlap<<<1,1>>> (
num_cams, // int num_cams, // number of cameras used
num_cams, // int num_cams, // number of cameras used
gpu_tasks, // struct tp_task * gpu_tasks,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats
// gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list
tp_task_size, // int num_tiles, // number of tiles in task list
// declare arrays in device code?
// declare arrays in device code?
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
...
@@ -1365,7 +1370,8 @@ int main(int argc, char **argv)
...
@@ -1365,7 +1370,8 @@ int main(int argc, char **argv)
generate_RBGA<<<1,1>>> (
generate_RBGA<<<1,1>>> (
num_cams, // int num_cams, // number of cameras used
num_cams, // int num_cams, // number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
gpu_tasks, // struct tp_task * gpu_tasks,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list
tp_task_size, // int num_tiles, // number of tiles in task list
// Does not require initialized gpu_texture_indices to be initialized - just allocated, will generate.
// Does not require initialized gpu_texture_indices to be initialized - just allocated, will generate.
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
...
...
src/tp_defines.h
View file @
a51d6a77
...
@@ -56,7 +56,8 @@
...
@@ -56,7 +56,8 @@
#define CORR_TILES_PER_BLOCK 4
#define CORR_TILES_PER_BLOCK 4
#define CORR_TILES_PER_BLOCK_NORMALIZE 4 // increase to 8?
#define CORR_TILES_PER_BLOCK_NORMALIZE 4 // increase to 8?
#define CORR_TILES_PER_BLOCK_COMBINE 4 // increase to 16?
#define CORR_TILES_PER_BLOCK_COMBINE 4 // increase to 16?
#define TEXTURE_THREADS 32 //
//#define TEXTURE_THREADS 32 //
#define NUM_THREADS 32
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_TILES_PER_BLOCK 1
#define TEXTURE_TILES_PER_BLOCK 1
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_THREADS_PER_TILE 16
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment