Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
ee0cfc3b
Commit
ee0cfc3b
authored
Nov 24, 2021
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
converting to varaible num_cams
parent
18d8e56b
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
935 additions
and
435 deletions
+935
-435
TileProcessor.cuh
src/TileProcessor.cuh
+725
-362
TileProcessor.h
src/TileProcessor.h
+11
-3
test_tp.cu
src/test_tp.cu
+194
-68
tp_defines.h
src/tp_defines.h
+5
-2
No files found.
src/TileProcessor.cuh
View file @
ee0cfc3b
This source diff could not be displayed because it is too large. You can
view the blob
instead.
src/TileProcessor.h
View file @
ee0cfc3b
...
@@ -44,10 +44,13 @@
...
@@ -44,10 +44,13 @@
extern
"C"
__global__
void
convert_direct
(
// called with a single block, single thread
extern
"C"
__global__
void
convert_direct
(
// called with a single block, single thread
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
int
num_cams
,
// actual number of cameras
int
num_colors
,
// actual number of colors: 3 for RGB, 1 for LWIR/mono
float
**
gpu_kernel_offsets
,
// [NUM_CAMS],
float
**
gpu_kernel_offsets
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
struct
tp_task
*
gpu_tasks
,
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// struct tp_task * gpu_tasks,
float
**
gpu_clt
,
// [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
dstride
,
// in floats (pixels)
size_t
dstride
,
// in floats (pixels)
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
...
@@ -60,15 +63,17 @@ extern "C" __global__ void convert_direct( // called with a single block, single
...
@@ -60,15 +63,17 @@ extern "C" __global__ void convert_direct( // called with a single block, single
int
*
pnum_active_tiles
,
// indices to gpu_tasks
int
*
pnum_active_tiles
,
// indices to gpu_tasks
int
tilesx
);
int
tilesx
);
extern
"C"
__global__
void
correlate2D
(
extern
"C"
__global__
void
correlate2D
(
int
num_cams
,
int
*
sel_pairs
,
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
int
colors
,
// number of colors (3/1)
int
colors
,
// number of colors (3/1)
float
scale0
,
// scale for R
float
scale0
,
// scale for R
float
scale1
,
// scale for B
float
scale1
,
// scale for B
float
scale2
,
// scale for G
float
scale2
,
// scale for G
float
fat_zero
,
// here - absolute
float
fat_zero
,
// here - absolute
struct
tp_task
*
gpu_tasks
,
// array of per-tile tasks (now bits 4..9 - correlation pairs)
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// struct tp_task * gpu_tasks, // array of per-tile tasks (now bits 4..9 - correlation pairs)
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
int
tilesx
,
// number of tile rows
int
tilesx
,
// number of tile rows
int
*
gpu_corr_indices
,
// packed tile+pair
int
*
gpu_corr_indices
,
// packed tile+pair
...
@@ -99,6 +104,7 @@ extern "C" __global__ void corr2D_combine(
...
@@ -99,6 +104,7 @@ extern "C" __global__ void corr2D_combine(
float
*
gpu_corrs_combo
);
// combined correlation output (one per tile)
float
*
gpu_corrs_combo
);
// combined correlation output (one per tile)
extern
"C"
__global__
void
textures_nonoverlap
(
extern
"C"
__global__
void
textures_nonoverlap
(
int
num_cams
,
// number of cameras used
struct
tp_task
*
gpu_tasks
,
struct
tp_task
*
gpu_tasks
,
int
num_tiles
,
// number of tiles in task list
int
num_tiles
,
// number of tiles in task list
// int num_tilesx, // number of tiles in a row
// int num_tilesx, // number of tiles in a row
...
@@ -121,6 +127,7 @@ extern "C" __global__ void textures_nonoverlap(
...
@@ -121,6 +127,7 @@ extern "C" __global__ void textures_nonoverlap(
extern
"C"
extern
"C"
__global__
void
imclt_rbg_all
(
__global__
void
imclt_rbg_all
(
int
num_cams
,
float
**
gpu_clt
,
// [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_corr_images
,
// [NUM_CAMS][WIDTH, 3 * HEIGHT]
float
**
gpu_corr_images
,
// [NUM_CAMS][WIDTH, 3 * HEIGHT]
int
apply_lpf
,
int
apply_lpf
,
...
@@ -142,6 +149,7 @@ extern "C" __global__ void imclt_rbg(
...
@@ -142,6 +149,7 @@ extern "C" __global__ void imclt_rbg(
const
size_t
dstride
);
// in floats (pixels)
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
__global__
void
generate_RBGA
(
extern
"C"
__global__
void
generate_RBGA
(
int
num_cams
,
// number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
struct
tp_task
*
gpu_tasks
,
struct
tp_task
*
gpu_tasks
,
int
num_tiles
,
// number of tiles in task list
int
num_tiles
,
// number of tiles in task list
...
...
src/test_tp.cu
View file @
ee0cfc3b
This diff is collapsed.
Click to expand it.
src/tp_defines.h
View file @
ee0cfc3b
...
@@ -41,9 +41,10 @@
...
@@ -41,9 +41,10 @@
#ifndef JCUDA
#ifndef JCUDA
#include <stdio.h>
#include <stdio.h>
#define THREADSX (DTT_SIZE)
#define THREADSX (DTT_SIZE)
#define NUM_CAMS 4
#define TEST_LWIR 1
#define NUM_CAMS 16 // now maximal number of cameras
#define NUM_PAIRS 6
#define NUM_PAIRS 6
#define NUM_COLORS 3
#define NUM_COLORS
1 //
3
#define IMG_WIDTH 2592
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164
#define KERNELS_HOR 164
...
@@ -55,11 +56,13 @@
...
@@ -55,11 +56,13 @@
#define CORR_TILES_PER_BLOCK 4
#define CORR_TILES_PER_BLOCK 4
#define CORR_TILES_PER_BLOCK_NORMALIZE 4 // increase to 8?
#define CORR_TILES_PER_BLOCK_NORMALIZE 4 // increase to 8?
#define CORR_TILES_PER_BLOCK_COMBINE 4 // increase to 16?
#define CORR_TILES_PER_BLOCK_COMBINE 4 // increase to 16?
#define TEXTURE_THREADS 32 //
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_THREADS_PER_TILE 8
#define TEXTURE_TILES_PER_BLOCK 1
#define TEXTURE_TILES_PER_BLOCK 1
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#define IMCLT_TILES_PER_BLOCK 4
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
// only lower bit will be used to request correlations, correlation mask will be common for all the scene
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define TASK_CORR_BITS 4
#define TASK_CORR_BITS 4
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment