Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
7d01b009
Commit
7d01b009
authored
Jun 16, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
addition to the previous commit
parent
81a46af5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
69 additions
and
3 deletions
+69
-3
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+53
-1
TileProcessor.h
src/main/resources/kernels/TileProcessor.h
+16
-2
No files found.
src/main/resources/kernels/TileProcessor.cuh
View file @
7d01b009
...
@@ -2965,7 +2965,7 @@ extern "C" __global__ void convert_direct( // called with a single block, singl
...
@@ -2965,7 +2965,7 @@ extern "C" __global__ void convert_direct( // called with a single block, singl
float
**
gpu_kernels
,
// [num_cams],
float
**
gpu_kernels
,
// [num_cams],
float
**
gpu_images
,
// [num_cams],
float
**
gpu_images
,
// [num_cams],
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
float
*
gpu_ftasks
,
// flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
float
**
gpu_clt
,
// [num_cams][TILE
-S
Y][TILES-X][num_colors][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [num_cams][TILE
S-
Y][TILES-X][num_colors][DTT_SIZE*DTT_SIZE]
size_t
dstride
,
// in floats (pixels)
size_t
dstride
,
// in floats (pixels)
int
num_tiles
,
// number of tiles in task
int
num_tiles
,
// number of tiles in task
int
lpf_mask
,
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
int
lpf_mask
,
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green. Now - always 0 !
...
@@ -3013,6 +3013,58 @@ extern "C" __global__ void convert_direct( // called with a single block, singl
...
@@ -3013,6 +3013,58 @@ extern "C" __global__ void convert_direct( // called with a single block, singl
}
}
}
}
/**
* Erase CLT tiles before generating corrected images when not all tiles are converted. IMCLT for full images
* processes all CLT tiles, so if some tiles are skipped, they preserve all TD data that appears in the output.
* No erase is needed before correlations or texture generation.
*
* @param num_cams number of subcameras <= NUM_CAMS. 4 for RGB, 16 for lwir in LWIR16
* @param num_colors number of colors <= NUM_COLORS. 3 for RGB, 1 for lwir/mono
* @param tiles_x number of tiles in a row
* @param tiles_y number of tile rows
* @param gpu_clt array of per-camera aberration-corrected transform-domain image representations
* @param fill_data data to write (normally 0.0f, may be NaN?)
*/
extern
"C"
__global__
void
erase_clt_tiles
(
int
num_cams
,
// actual number of cameras
int
num_colors
,
// actual number of colors: 3 for RGB, 1 for LWIR/mono
int
tiles_x
,
int
tiles_y
,
float
**
gpu_clt
,
// [num_cams][tiles_y][tiles_x][num_colors][4*DTT_SIZE*DTT_SIZE]
float
fill_data
)
{
if
(
threadIdx
.
x
==
0
)
{
// anyway 1,1,1
dim3
threads_erase
(
NUM_THREADS
,
1
,
1
);
// (32,1,1)
dim3
grid_erase
(
tiles_x
,
tiles_y
,
num_cams
);
erase_clt_tiles_inner
<<<
grid_erase
,
threads_erase
>>>
(
num_colors
,
// int num_colors,
tiles_x
,
// int tiles_x,
gpu_clt
,
// float ** gpu_clt,
fill_data
);
// float fill_data)
}
}
extern
"C"
__global__
void
erase_clt_tiles_inner
(
int
num_colors
,
// actual number of colors: 3 for RGB, 1 for LWIR/mono
int
tiles_x
,
float
**
gpu_clt
,
// [num_cams][tiles_y][tiles_x][num_colors][4*DTT_SIZE*DTT_SIZE]
float
fill_data
)
{
int
tile_size
=
num_colors
*
(
4
*
DTT_SIZE
*
DTT_SIZE
);
// can not use gridDim -> cuda.CudaException: CUDA_ERROR_INVALID_PTX
// float * data = gpu_clt[blockIdx.z] + tile_size * (blockIdx.x + blockIdx.y * gridDim.x) + threadIdx.x;
float
*
data
=
gpu_clt
[
blockIdx
.
z
]
+
tile_size
*
(
blockIdx
.
x
+
blockIdx
.
y
*
tiles_x
)
+
threadIdx
.
x
;
for
(
int
ncol
=
0
;
ncol
<
num_colors
;
ncol
++
){
#pragma unroll
for
(
int
i
=
0
;
i
<
(
4
*
DTT_SIZE
*
DTT_SIZE
/
NUM_THREADS
);
i
++
){
*
data
=
fill_data
;
data
+=
NUM_THREADS
;
}
}
}
/**
/**
* Helper kernel for convert_direct() - perform actual conversion.
* Helper kernel for convert_direct() - perform actual conversion.
*
*
...
...
src/main/resources/kernels/TileProcessor.h
View file @
7d01b009
...
@@ -163,8 +163,22 @@ __global__ void imclt_rbg_all(
...
@@ -163,8 +163,22 @@ __global__ void imclt_rbg_all(
const
size_t
dstride
);
// in floats (pixels)
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
__global__
void
erase8x8
(
extern
"C"
__global__
void
erase8x8
(
float
*
gpu_top_left
,
float
*
gpu_top_left
,
const
size_t
dstride
);
const
size_t
dstride
);
extern
"C"
__global__
void
erase_clt_tiles
(
int
num_cams
,
// actual number of cameras
int
num_colors
,
// actual number of colors: 3 for RGB, 1 for LWIR/mono
int
tiles_x
,
int
tiles_y
,
float
**
gpu_clt
,
// [num_cams][tiles_y][tiles_x][num_colors][4*DTT_SIZE*DTT_SIZE]
float
fill_data
);
extern
"C"
__global__
void
erase_clt_tiles_inner
(
int
num_colors
,
// actual number of colors: 3 for RGB, 1 for LWIR/mono
int
tiles_x
,
float
**
gpu_clt
,
// [num_cams][tiles_y][tiles_x][num_colors][4*DTT_SIZE*DTT_SIZE]
float
fill_data
);
extern
"C"
__global__
void
imclt_rbg
(
extern
"C"
__global__
void
imclt_rbg
(
float
*
gpu_clt
,
// [TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_clt
,
// [TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment