Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
94aa6562
Commit
94aa6562
authored
Apr 08, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
moved LPF to 4-image generation to match correlations and textures
parent
3d5ddc28
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
90 additions
and
1156 deletions
+90
-1156
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+6
-1
TwoQuadCLT.java
...main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+17
-17
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+67
-41
test_tp.cu
src/main/resources/kernels/test_tp.cu
+0
-1097
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
94aa6562
...
@@ -979,12 +979,15 @@ public class GPUTileProcessor {
...
@@ -979,12 +979,15 @@ public class GPUTileProcessor {
cuCtxSynchronize
();
// remove later
cuCtxSynchronize
();
// remove later
}
}
public
void
execImcltRbg
()
{
public
void
execImcltRbg
(
boolean
is_mono
)
{
if
(
GPU_IMCLT_RBG_kernel
==
null
)
if
(
GPU_IMCLT_RBG_kernel
==
null
)
{
{
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: GPU_IMCLT_RBG_kernel"
);
IJ
.
showMessage
(
"Error"
,
"No GPU kernel: GPU_IMCLT_RBG_kernel"
);
return
;
return
;
}
}
int
apply_lpf
=
1
;
int
tilesX
=
IMG_WIDTH
/
DTT_SIZE
;
int
tilesX
=
IMG_WIDTH
/
DTT_SIZE
;
int
tilesY
=
IMG_HEIGHT
/
DTT_SIZE
;
int
tilesY
=
IMG_HEIGHT
/
DTT_SIZE
;
int
[]
ThreadsFullWarps
=
{
IMCLT_THREADS_PER_TILE
,
IMCLT_TILES_PER_BLOCK
,
1
};
int
[]
ThreadsFullWarps
=
{
IMCLT_THREADS_PER_TILE
,
IMCLT_TILES_PER_BLOCK
,
1
};
...
@@ -999,6 +1002,8 @@ public class GPUTileProcessor {
...
@@ -999,6 +1002,8 @@ public class GPUTileProcessor {
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
gpu_clt_h
[
ncam
]),
Pointer
.
to
(
gpu_clt_h
[
ncam
]),
Pointer
.
to
(
gpu_corr_images_h
[
ncam
]),
Pointer
.
to
(
gpu_corr_images_h
[
ncam
]),
Pointer
.
to
(
new
int
[]
{
apply_lpf
}),
Pointer
.
to
(
new
int
[]
{
is_mono
?
1
:
0
}),
Pointer
.
to
(
new
int
[]
{
color
}),
Pointer
.
to
(
new
int
[]
{
color
}),
Pointer
.
to
(
new
int
[]
{
v_offs
}),
Pointer
.
to
(
new
int
[]
{
v_offs
}),
Pointer
.
to
(
new
int
[]
{
h_offs
}),
Pointer
.
to
(
new
int
[]
{
h_offs
}),
...
...
src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
View file @
94aa6562
...
@@ -2088,7 +2088,7 @@ public class TwoQuadCLT {
...
@@ -2088,7 +2088,7 @@ public class TwoQuadCLT {
// run imclt;
// run imclt;
long
startIMCLT
=
System
.
nanoTime
();
long
startIMCLT
=
System
.
nanoTime
();
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NREPEAT
;
i
++
)
{
gPUTileProcessor
.
execImcltRbg
();
gPUTileProcessor
.
execImcltRbg
(
quadCLT_main
.
isMonochrome
()
);
}
}
long
endImcltTime
=
System
.
nanoTime
();
long
endImcltTime
=
System
.
nanoTime
();
// run correlation
// run correlation
...
@@ -2177,7 +2177,7 @@ public class TwoQuadCLT {
...
@@ -2177,7 +2177,7 @@ public class TwoQuadCLT {
if
(
clt_parameters
.
show_corr
)
{
if
(
clt_parameters
.
show_corr
)
{
float
[][]
corr2D
=
gPUTileProcessor
.
getCorr2D
(
float
[][]
corr2D
=
gPUTileProcessor
.
getCorr2D
(
clt_parameters
.
gpu_corr_rad
);
// int corr_rad);
clt_parameters
.
gpu_corr_rad
);
// int corr_rad);
// convert to 6-layer image using tasks
// convert to 6-layer image using tasks
double
[][]
dbg_corr
=
GPUTileProcessor
.
getCorr2DView
(
double
[][]
dbg_corr
=
GPUTileProcessor
.
getCorr2DView
(
tilesX
,
tilesX
,
tilesY
,
tilesY
,
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
94aa6562
...
@@ -106,6 +106,8 @@
...
@@ -106,6 +106,8 @@
#define DBG_TILE_Y 111 // 66
#define DBG_TILE_Y 111 // 66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#undef DBG_MARK_DBG_TILE 1
//56494
//56494
// struct tp_task
// struct tp_task
//#define TASK_SIZE 12
//#define TASK_SIZE 12
...
@@ -879,6 +881,18 @@ extern "C" __global__ void textures_accumulate(
...
@@ -879,6 +881,18 @@ extern "C" __global__ void textures_accumulate(
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
size_t
texture_stride
,
// in floats (now 256*4 = 1024)
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
float
*
gpu_texture_tiles
);
// (number of colors +1 + ?)*16*16 rgba texture tiles
extern
"C"
__global__
void
imclt_rbg
(
float
*
gpu_clt
,
// [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
int
apply_lpf
,
int
mono
,
// defines lpf filter
int
color
,
// defines location of clt data
int
v_offset
,
int
h_offset
,
const
size_t
dstride
);
// in floats (pixels)
//===========================
extern
"C"
extern
"C"
__global__
void
correlate2D
(
__global__
void
correlate2D
(
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
**
gpu_clt
,
// [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
...
@@ -2274,16 +2288,12 @@ __global__ void textures_accumulate(
...
@@ -2274,16 +2288,12 @@ __global__ void textures_accumulate(
}
// textures_accumulate()
}
// textures_accumulate()
extern
"C"
extern
"C"
__global__
void
imclt_rbg
(
__global__
void
imclt_rbg
(
float
*
gpu_clt
,
// [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_clt
,
// [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
float
*
gpu_rbg
,
// WIDTH, 3 * HEIGHT
int
apply_lpf
,
int
mono
,
int
color
,
int
color
,
int
v_offset
,
int
v_offset
,
int
h_offset
,
int
h_offset
,
...
@@ -2334,13 +2344,27 @@ __global__ void imclt_rbg(
...
@@ -2334,13 +2344,27 @@ __global__ void imclt_rbg(
clt_tile
+=
column
+
thr3
;
// first 2 rows
clt_tile
+=
column
+
thr3
;
// first 2 rows
gpu_tile
+=
column
;
// first 2 rows
gpu_tile
+=
column
;
// first 2 rows
if
(
apply_lpf
)
{
// lpf - covers 2 rows, as there there are 16 threads
float
*
lpf0
=
lpf_data
[
mono
?
3
:
color
]
+
threadIdx
.
x
;
// lpf_data[3] - mono
#pragma unroll
for
(
int
q
=
0
;
q
<
4
;
q
++
){
float
*
lpf
=
lpf0
;
for
(
int
i
=
0
;
i
<
DTT_SIZE
/
2
;
i
++
){
*
clt_tile
=
*
gpu_tile
*
(
*
lpf
);
clt_tile
+=
(
2
*
DTT_SIZE1
);
gpu_tile
+=
(
2
*
DTT_SIZE
);
lpf
+=
(
2
*
DTT_SIZE
);
}
}
}
else
{
#pragma unroll
#pragma unroll
for
(
int
i
=
0
;
i
<
DTT_SIZE2
;
i
++
){
for
(
int
i
=
0
;
i
<
DTT_SIZE2
;
i
++
){
*
clt_tile
=
*
gpu_tile
;
*
clt_tile
=
*
gpu_tile
;
clt_tile
+=
(
2
*
DTT_SIZE1
);
clt_tile
+=
(
2
*
DTT_SIZE1
);
gpu_tile
+=
(
2
*
DTT_SIZE
);
gpu_tile
+=
(
2
*
DTT_SIZE
);
}
}
}
float
*
mclt_top
=
((
float
*
)
mclt_tiles
)
+
tile_in_block
*
(
DTT_SIZE2
*
DTT_SIZE21
)
+
column
;
float
*
mclt_top
=
((
float
*
)
mclt_tiles
)
+
tile_in_block
*
(
DTT_SIZE2
*
DTT_SIZE21
)
+
column
;
float
*
rbg_top
=
color_plane
+
(
tileY
*
DTT_SIZE
)
*
dstride
+
(
tileX
*
DTT_SIZE
)
+
column
;
float
*
rbg_top
=
color_plane
+
(
tileY
*
DTT_SIZE
)
*
dstride
+
(
tileX
*
DTT_SIZE
)
+
column
;
float
*
mclt_tile
=
mclt_top
;
float
*
mclt_tile
=
mclt_top
;
...
@@ -2377,7 +2401,7 @@ __global__ void imclt_rbg(
...
@@ -2377,7 +2401,7 @@ __global__ void imclt_rbg(
// save result (back)
// save result (back)
float
*
rbg_p
=
rbg_top
;
float
*
rbg_p
=
rbg_top
;
mclt_tile
=
mclt_top
;
mclt_tile
=
mclt_top
;
if
((
tileX
==
0
)
&&
(
tileY
==
0
)){
if
((
tileX
==
0
)
&&
(
tileY
==
0
)){
...
@@ -2387,6 +2411,7 @@ __global__ void imclt_rbg(
...
@@ -2387,6 +2411,7 @@ __global__ void imclt_rbg(
mclt_tile
+=
DTT_SIZE21
;
mclt_tile
+=
DTT_SIZE21
;
rbg_p
+=
dstride
;
// DTT_SIZE2; // FIXME
rbg_p
+=
dstride
;
// DTT_SIZE2; // FIXME
}
}
#ifdef DBG_MARK_DBG_TILE
}
else
if
((
tileX
==
DBG_TILE_X
)
&&
(
tileY
==
DBG_TILE_Y
)){
}
else
if
((
tileX
==
DBG_TILE_X
)
&&
(
tileY
==
DBG_TILE_Y
)){
#pragma unroll
#pragma unroll
for
(
int
i
=
0
;
i
<
DTT_SIZE2
;
i
++
){
for
(
int
i
=
0
;
i
<
DTT_SIZE2
;
i
++
){
...
@@ -2394,6 +2419,7 @@ __global__ void imclt_rbg(
...
@@ -2394,6 +2419,7 @@ __global__ void imclt_rbg(
mclt_tile
+=
DTT_SIZE21
;
mclt_tile
+=
DTT_SIZE21
;
rbg_p
+=
dstride
;
// DTT_SIZE2; // FIXME
rbg_p
+=
dstride
;
// DTT_SIZE2; // FIXME
}
}
#endif
}
else
{
}
else
{
#pragma unroll
#pragma unroll
for
(
int
i
=
0
;
i
<
DTT_SIZE2
;
i
++
){
for
(
int
i
=
0
;
i
<
DTT_SIZE2
;
i
++
){
...
...
src/main/resources/kernels/test_tp.cu
deleted
100644 → 0
View file @
3d5ddc28
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment