Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
3f61a6c8
Commit
3f61a6c8
authored
May 13, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Removed KERNEL_LSTEP define as kernel step is now calculated dynamically
It is different for LWIR (8) and RGB(16)
parent
4125cbb1
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
30 additions
and
9 deletions
+30
-9
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+2
-2
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+28
-7
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
3f61a6c8
...
@@ -116,7 +116,7 @@ public class GPUTileProcessor {
...
@@ -116,7 +116,7 @@ public class GPUTileProcessor {
// public static int IMG_HEIGHT = 1936;
// public static int IMG_HEIGHT = 1936;
static
int
KERNELS_HOR
=
164
;
static
int
KERNELS_HOR
=
164
;
static
int
KERNELS_VERT
=
123
;
static
int
KERNELS_VERT
=
123
;
static
int
KERNELS_LSTEP
=
3
;
// 4;// FIXME: Make it dynamic: 3 for LWIR, 4 - for RGB?)
///
static int KERNELS_LSTEP = 3; // 4;// FIXME: Make it dynamic: 3 for LWIR, 4 - for RGB?)
static
int
THREADS_PER_TILE
=
8
;
static
int
THREADS_PER_TILE
=
8
;
static
int
TILES_PER_BLOCK
=
4
;
// 8 - slower
static
int
TILES_PER_BLOCK
=
4
;
// 8 - slower
static
int
CORR_THREADS_PER_TILE
=
8
;
static
int
CORR_THREADS_PER_TILE
=
8
;
...
@@ -208,7 +208,7 @@ public class GPUTileProcessor {
...
@@ -208,7 +208,7 @@ public class GPUTileProcessor {
"#define NUM_CAMS "
+
MAX_NUM_CAMS
+
"\n"
+
"#define NUM_CAMS "
+
MAX_NUM_CAMS
+
"\n"
+
// "#define NUM_PAIRS " + NUM_PAIRS+"\n"+
// "#define NUM_PAIRS " + NUM_PAIRS+"\n"+
// "#define NUM_COLORS " + NUM_COLORS+"\n"+
// "#define NUM_COLORS " + NUM_COLORS+"\n"+
"#define KERNELS_LSTEP "
+
KERNELS_LSTEP
+
"\n"
+
///
"#define KERNELS_LSTEP " + KERNELS_LSTEP+"\n"+
"#define THREADS_PER_TILE "
+
THREADS_PER_TILE
+
"\n"
+
"#define THREADS_PER_TILE "
+
THREADS_PER_TILE
+
"\n"
+
"#define TILES_PER_BLOCK "
+
TILES_PER_BLOCK
+
"\n"
+
"#define TILES_PER_BLOCK "
+
TILES_PER_BLOCK
+
"\n"
+
"#define CORR_THREADS_PER_TILE "
+
CORR_THREADS_PER_TILE
+
"\n"
+
"#define CORR_THREADS_PER_TILE "
+
CORR_THREADS_PER_TILE
+
"\n"
+
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
3f61a6c8
...
@@ -100,7 +100,7 @@ GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.
...
@@ -100,7 +100,7 @@ GPU run time =523.451927ms, (direct conversion: 24.080189999999998ms, imclt: 17.
//#define IMCLT_TILES_PER_BLOCK 4
//#define IMCLT_TILES_PER_BLOCK 4
#define KERNELS_STEP (1 << KERNELS_LSTEP)
///
#define KERNELS_STEP (1 << KERNELS_LSTEP)
//#define TILES-X (IMG-WIDTH / DTT_SIZE)
//#define TILES-X (IMG-WIDTH / DTT_SIZE)
//#define TILES-Y (IMG-HEIGHT / DTT_SIZE)
//#define TILES-Y (IMG-HEIGHT / DTT_SIZE)
#define CONVERT_DIRECT_INDEXING_THREADS_LOG2 5
#define CONVERT_DIRECT_INDEXING_THREADS_LOG2 5
...
@@ -2810,7 +2810,7 @@ __global__ void convert_correct_tiles(
...
@@ -2810,7 +2810,7 @@ __global__ void convert_correct_tiles(
* @param dust_remove do not reduce average weight when only one image differs much from the average (true)
* @param dust_remove do not reduce average weight when only one image differs much from the average (true)
* @param texture_stride output stride in floats (now 256*4 = 1024)
* @param texture_stride output stride in floats (now 256*4 = 1024)
* @param gpu_texture_tiles output array (number of colors +1 + ?)*16*16 rgba texture tiles) float values. Will not be calculated if null
* @param gpu_texture_tiles output array (number of colors +1 + ?)*16*16 rgba texture tiles) float values. Will not be calculated if null
* @param inescan_order 0 low-res tiles have t
g
he same order, as gpu_texture_indices, 1 - in linescan order
* @param inescan_order 0 low-res tiles have the same order, as gpu_texture_indices, 1 - in linescan order
* @param gpu_diff_rgb_combo low-resolution output, with per-camera mismatch an each color average. Will not be calculated if null
* @param gpu_diff_rgb_combo low-resolution output, with per-camera mismatch an each color average. Will not be calculated if null
* @param num_tilesx number of tiles in a row
* @param num_tilesx number of tiles in a row
*/
*/
...
@@ -4106,10 +4106,31 @@ __device__ void convertCorrectTile(
...
@@ -4106,10 +4106,31 @@ __device__ void convertCorrectTile(
int
kernel_index
;
// common for all coors
int
kernel_index
;
// common for all coors
float
kdx
,
kdy
;
float
kdx
,
kdy
;
if
(
threadIdx
.
x
==
0
){
if
(
threadIdx
.
x
==
0
){
ktileX
=
min
(
max_kernel_hor
,
max
(
0
,
((
int
)
lrintf
(
centerX
*
(
1.0
/
KERNELS_STEP
)
+
1
))));
// ktileX = min(max_kernel_hor, max(0, ((int) lrintf(centerX * (1.0/KERNELS_STEP)+1))));
ktileY
=
min
(
max_kernel_vert
,
max
(
0
,
((
int
)
lrintf
(
centerY
*
(
1.0
/
KERNELS_STEP
)
+
1
))));
// ktileY = min(max_kernel_vert, max(0, ((int) lrintf(centerY * (1.0/KERNELS_STEP)+1))));
kdx
=
centerX
-
(
ktileX
<<
KERNELS_LSTEP
)
+
(
1
<<
(
KERNELS_LSTEP
-
1
));
// difference in pixel
// kdx = centerX - (ktileX << KERNELS_LSTEP) + (1 << (KERNELS_LSTEP -1)); // difference in pixel
kdy
=
centerY
-
(
ktileY
<<
KERNELS_LSTEP
)
+
(
1
<<
(
KERNELS_LSTEP
-
1
));
// difference in pixel
// kdy = centerY - (ktileY << KERNELS_LSTEP) + (1 << (KERNELS_LSTEP -1)); // difference in pixel
// From ImageDttCPU.java: extract_correct_tile() (modified 2022/05/12):
// int kernel_pitch = width/(clt_kernels[chn_kernel][0].length - 2);
// 1. find closest kernel
// ktileX = (int) Math.round(centerX/kernel_pitch) + 1;
// ktileY = (int) Math.round(centerY/kernel_pitch) + 1;
// if (ktileY < 0) ktileY = 0;
// else if (ktileY >= clt_kernels[chn_kernel].length) ktileY = clt_kernels[chn_kernel].length-1;
// if (ktileX < 0) ktileX = 0;
// else if (ktileX >= clt_kernels[chn_kernel][ktileY].length) ktileX = clt_kernels[chn_kernel][ktileY].length-1;
// extract center offset data stored with each kernel tile
// CltExtra ce = new CltExtra (clt_kernels[chn_kernel][ktileY][ktileX][4]);
// 2. calculate correction for center of the kernel offset
// double kdx = centerX - (ktileX -1 +0.5) * kernel_pitch; // difference in pixel
// double kdy = centerY - (ktileY -1 +0.5) * kernel_pitch;
int
kernel_pitch
=
woi_width
/
(
kernels_hor
-
2
);
ktileX
=
min
(
max_kernel_hor
,
max
(
0
,
((
int
)
lrintf
(
centerX
/
kernel_pitch
+
1
))));
ktileY
=
min
(
max_kernel_vert
,
max
(
0
,
((
int
)
lrintf
(
centerY
/
kernel_pitch
+
1
))));
kdx
=
centerX
-
(
ktileX
-
0.5
)
*
kernel_pitch
;
// difference in pixel
kdy
=
centerY
-
(
ktileY
-
0.5
)
*
kernel_pitch
;
//
kernel_index
=
(
ktileX
+
ktileY
*
kernels_hor
)
*
num_colors
;
kernel_index
=
(
ktileX
+
ktileY
*
kernels_hor
)
*
num_colors
;
}
}
// broadcast kernel_index
// broadcast kernel_index
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment