Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
a744e6be
Commit
a744e6be
authored
Oct 06, 2018
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added debug output for LPF filter to compare with GPU
parent
ddc33b02
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
79 additions
and
6 deletions
+79
-6
ImageDtt.java
src/main/java/ImageDtt.java
+14
-1
TileProcessor.cuh
src/main/resources/TileProcessor.cuh
+65
-5
No files found.
src/main/java/ImageDtt.java
View file @
a744e6be
...
...
@@ -4216,7 +4216,20 @@ public class ImageDtt {
final
double
[]
dbg_filter
=
dtt
.
dttt_ii
(
filter
);
for
(
int
i
=
0
;
i
<
filter
.
length
;
i
++)
filter
[
i
]
*=
2
*
dct_size
;
if
(
globalDebugLevel
>
1
)
{
if
(
globalDebugLevel
>
2
)
{
System
.
out
.
print
(
"__constant__ float lpf_data[64]={"
);
for
(
int
i
=
0
;
i
<
filter
.
length
;
i
++){
System
.
out
.
print
(
String
.
format
(
"%5.8ff"
,
filter
[
i
]));
if
(
i
==
63
)
{
System
.
out
.
println
(
"};"
);
}
else
{
System
.
out
.
print
(
", "
);
if
((
i
%
8
)
==
7
)
{
System
.
out
.
print
(
"\n "
);
}
}
}
}
else
if
(
globalDebugLevel
>
1
)
{
for
(
int
i
=
0
;
i
<
filter
.
length
;
i
++){
System
.
out
.
println
(
"dct_lpf_psf() "
+
i
+
": "
+
filter
[
i
]);
}
...
...
src/main/resources/TileProcessor.cuh
View file @
a744e6be
...
...
@@ -254,7 +254,36 @@ __constant__ float idct_signs[4][4][4] ={
{
1
,
1
,
1
,
-
1
},
{
-
1
,
-
1
,
-
1
,
1
}
}};
// LPF for sigma 0.9 each color (modify through cudaMemcpyToSymbol() or similar in Driver API
__constant__
float
lpf_data
[
3
][
64
]
=
{
{
1.00000000
f
,
0.87041007
f
,
0.65943687
f
,
0.43487258
f
,
0.24970076
f
,
0.12518080
f
,
0.05616371
f
,
0.02728573
f
,
0.87041007
f
,
0.75761368
f
,
0.57398049
f
,
0.37851747
f
,
0.21734206
f
,
0.10895863
f
,
0.04888546
f
,
0.02374977
f
,
0.65943687
f
,
0.57398049
f
,
0.43485698
f
,
0.28677101
f
,
0.16466189
f
,
0.08254883
f
,
0.03703642
f
,
0.01799322
f
,
0.43487258
f
,
0.37851747
f
,
0.28677101
f
,
0.18911416
f
,
0.10858801
f
,
0.05443770
f
,
0.02442406
f
,
0.01186582
f
,
0.24970076
f
,
0.21734206
f
,
0.16466189
f
,
0.10858801
f
,
0.06235047
f
,
0.03125774
f
,
0.01402412
f
,
0.00681327
f
,
0.12518080
f
,
0.10895863
f
,
0.08254883
f
,
0.05443770
f
,
0.03125774
f
,
0.01567023
f
,
0.00703062
f
,
0.00341565
f
,
0.05616371
f
,
0.04888546
f
,
0.03703642
f
,
0.02442406
f
,
0.01402412
f
,
0.00703062
f
,
0.00315436
f
,
0.00153247
f
,
0.02728573
f
,
0.02374977
f
,
0.01799322
f
,
0.01186582
f
,
0.00681327
f
,
0.00341565
f
,
0.00153247
f
,
0.00074451
f
},{
1.00000000
f
,
0.87041007
f
,
0.65943687
f
,
0.43487258
f
,
0.24970076
f
,
0.12518080
f
,
0.05616371
f
,
0.02728573
f
,
0.87041007
f
,
0.75761368
f
,
0.57398049
f
,
0.37851747
f
,
0.21734206
f
,
0.10895863
f
,
0.04888546
f
,
0.02374977
f
,
0.65943687
f
,
0.57398049
f
,
0.43485698
f
,
0.28677101
f
,
0.16466189
f
,
0.08254883
f
,
0.03703642
f
,
0.01799322
f
,
0.43487258
f
,
0.37851747
f
,
0.28677101
f
,
0.18911416
f
,
0.10858801
f
,
0.05443770
f
,
0.02442406
f
,
0.01186582
f
,
0.24970076
f
,
0.21734206
f
,
0.16466189
f
,
0.10858801
f
,
0.06235047
f
,
0.03125774
f
,
0.01402412
f
,
0.00681327
f
,
0.12518080
f
,
0.10895863
f
,
0.08254883
f
,
0.05443770
f
,
0.03125774
f
,
0.01567023
f
,
0.00703062
f
,
0.00341565
f
,
0.05616371
f
,
0.04888546
f
,
0.03703642
f
,
0.02442406
f
,
0.01402412
f
,
0.00703062
f
,
0.00315436
f
,
0.00153247
f
,
0.02728573
f
,
0.02374977
f
,
0.01799322
f
,
0.01186582
f
,
0.00681327
f
,
0.00341565
f
,
0.00153247
f
,
0.00074451
f
},{
1.00000000
f
,
0.87041007
f
,
0.65943687
f
,
0.43487258
f
,
0.24970076
f
,
0.12518080
f
,
0.05616371
f
,
0.02728573
f
,
0.87041007
f
,
0.75761368
f
,
0.57398049
f
,
0.37851747
f
,
0.21734206
f
,
0.10895863
f
,
0.04888546
f
,
0.02374977
f
,
0.65943687
f
,
0.57398049
f
,
0.43485698
f
,
0.28677101
f
,
0.16466189
f
,
0.08254883
f
,
0.03703642
f
,
0.01799322
f
,
0.43487258
f
,
0.37851747
f
,
0.28677101
f
,
0.18911416
f
,
0.10858801
f
,
0.05443770
f
,
0.02442406
f
,
0.01186582
f
,
0.24970076
f
,
0.21734206
f
,
0.16466189
f
,
0.10858801
f
,
0.06235047
f
,
0.03125774
f
,
0.01402412
f
,
0.00681327
f
,
0.12518080
f
,
0.10895863
f
,
0.08254883
f
,
0.05443770
f
,
0.03125774
f
,
0.01567023
f
,
0.00703062
f
,
0.00341565
f
,
0.05616371
f
,
0.04888546
f
,
0.03703642
f
,
0.02442406
f
,
0.01402412
f
,
0.00703062
f
,
0.00315436
f
,
0.00153247
f
,
0.02728573
f
,
0.02374977
f
,
0.01799322
f
,
0.01186582
f
,
0.00681327
f
,
0.00341565
f
,
0.00153247
f
,
0.00074451
f
}};
__device__
void
convertCorrectTile
(
struct
CltExtra
*
gpu_kernel_offsets
,
// [tileY][tileX][color]
...
...
@@ -262,6 +291,7 @@ __device__ void convertCorrectTile(
float
*
gpu_images
,
float
*
gpu_clt
,
const
int
color
,
const
int
lpf_mask
,
const
float
centerX
,
const
float
centerY
,
const
short
tx
,
...
...
@@ -300,8 +330,9 @@ __global__ void tileProcessor(
float
**
gpu_images
,
// [NUM_CAMS],
struct
tp_task
*
gpu_tasks
,
float
**
gpu_clt
,
// [NUM_CAMS][TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t
dstride
,
// // in floats (pixels)
int
num_tiles
)
// number of tiles in task
size_t
dstride
,
// in floats (pixels)
int
num_tiles
,
// number of tiles in task
int
lpf_mask
)
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
{
dim3
t
=
threadIdx
;
...
...
@@ -356,6 +387,7 @@ __global__ void tileProcessor(
gpu_images
[
ncam
],
// float * gpu_images,
gpu_clt
[
ncam
],
// float * gpu_clt,
color
,
// const int color,
lpf_mask
,
// const int lpf_mask,
tt
[
tile_in_block
].
xy
[
ncam
][
0
],
// const float centerX,
tt
[
tile_in_block
].
xy
[
ncam
][
1
],
// const float centerY,
tt
[
tile_in_block
].
tx
,
// const short tx,
...
...
@@ -529,6 +561,7 @@ __device__ void convertCorrectTile(
float
*
gpu_images
,
float
*
gpu_clt
,
const
int
color
,
const
int
lpf_mask
,
const
float
centerX
,
const
float
centerY
,
const
short
tx
,
...
...
@@ -902,7 +935,7 @@ __device__ void convertCorrectTile(
#ifdef DBG_TILE
#ifdef DEBUG
1
#ifdef DEBUG
3
if
((
threadIdx
.
x
)
==
0
){
printf
(
"
\n
DTT Tiles after vertical shift, color = %d
\n
"
,
color
);
debug_print_clt1
(
clt_tile
,
color
,
0xf
);
// only 1 quadrant for R,B and 2 - for G
...
...
@@ -911,6 +944,33 @@ __device__ void convertCorrectTile(
__syncthreads
();
// __syncwarp();
#endif
#endif
// optionally apply LF
if
((
lpf_mask
>>
color
)
&
1
){
float
*
clt
=
clt_tile
+
threadIdx
.
x
;
#pragma unroll
for
(
int
q
=
0
;
q
<
4
;
q
++
)
{
float
*
lpf
=
lpf_data
[
color
]
+
threadIdx
.
x
;
#pragma unroll
for
(
int
i
=
0
;
i
<
8
;
i
++
){
(
*
clt
)
*=
(
*
lpf
);
clt
+=
DTT_SIZE1
;
lpf
+=
DTT_SIZE
;
}
}
__syncthreads
();
// __syncwarp();
#ifdef DBG_TILE
#ifdef DEBUG3
if
((
threadIdx
.
x
)
==
0
){
printf
(
"
\n
DTT Tiles after LPF, color = %d
\n
"
,
color
);
debug_print_clt1
(
clt_tile
,
color
,
0xf
);
// only 1 quadrant for R,B and 2 - for G
printf
(
"
\n
DTT All done
\n
"
);
}
__syncthreads
();
// __syncwarp();
#endif
#endif
}
int
offset_src
=
threadIdx
.
x
;
...
...
@@ -919,7 +979,7 @@ __device__ void convertCorrectTile(
float
*
clt_dst
=
gpu_clt
+
offset_dst
;
// ((ty * TILESX + tx)*NUM_COLORS + color)* ( 4 * DTT_SIZE * DTT_SIZE1) + threadIdx.x; // gpu_kernels + kernel_full_index* (DTT_SIZE * DTT_SIZE * 4);
#ifdef DBG_TILE
#ifdef DEBUG
1
#ifdef DEBUG
3
if
((
threadIdx
.
x
)
==
0
){
printf
(
"clt_src = 0x%lx
\n
"
,
clt_src
);
printf
(
"clt_dst = 0x%lx
\n
"
,
clt_dst
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment