Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
10c327ae
Commit
10c327ae
authored
Apr 05, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
debugging GPU OOB with large disparities, fixed in the kernel code
parent
b5bfb231
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
121 additions
and
42 deletions
+121
-42
Eyesis_Correction.java
.../java/com/elphel/imagej/correction/Eyesis_Correction.java
+1
-0
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+1
-1
TwoQuadCLT.java
...main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+90
-34
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+29
-7
No files found.
src/main/java/com/elphel/imagej/correction/Eyesis_Correction.java
View file @
10c327ae
...
...
@@ -5812,6 +5812,7 @@ private Panel panel1,
QUAD_CLT
,
// QuadCLT quadCLT_main,
QUAD_CLT_AUX
,
// QuadCLT quadCLT_aux,
CLT_PARAMETERS
,
// EyesisCorrectionParameters.DCTParameters dct_parameters,
CORRECTION_PARAMETERS
,
// EyesisCorrectionParameters ecp,
DEBAYER_PARAMETERS
,
//EyesisCorrectionParameters.DebayerParameters debayerParameters,
COLOR_PROC_PARAMETERS
,
//EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
COLOR_PROC_PARAMETERS_AUX
,
//EyesisCorrectionParameters.ColorProcParameters colorProcParameters_aux,
...
...
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
10c327ae
...
...
@@ -1359,7 +1359,7 @@ public class GPUTileProcessor {
public
void
getTileSubcamOffsets
(
final
TpTask
[]
tp_tasks
,
// will use // modify to have offsets for 8 cameras
final
GeometryCorrection
geometryCorrection_main
,
final
GeometryCorrection
geometryCorrection_aux
,
// if null, will only calculate offsets fro the main camera
final
GeometryCorrection
geometryCorrection_aux
,
// if null, will only calculate offsets fro
m
the main camera
final
double
[][][]
ers_delay
,
// if not null - fill with tile center acquisition delay
final
int
threadsMax
,
// maximal number of threads to launch
final
int
debugLevel
)
...
...
src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
View file @
10c327ae
...
...
@@ -452,17 +452,18 @@ public class TwoQuadCLT {
}
public
void
processCLTQuadCorrPairsGpu
(
GPUTileProcessor
gPUTileProcessor
,
QuadCLT
quadCLT_main
,
QuadCLT
quadCLT_aux
,
GPUTileProcessor
gPUTileProcessor
,
QuadCLT
quadCLT_main
,
QuadCLT
quadCLT_aux
,
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
DebayerParameters
debayerParameters
,
ColorProcParameters
colorProcParameters
,
ColorProcParameters
colorProcParameters_aux
,
EyesisCorrectionParameters
.
RGBParameters
rgbParameters
,
final
int
threadsMax
,
// maximal number of threads to launch
final
boolean
updateStatus
,
final
int
debugLevel
)
throws
Exception
EyesisCorrectionParameters
.
CorrectionParameters
ecp
,
EyesisCorrectionParameters
.
DebayerParameters
debayerParameters
,
ColorProcParameters
colorProcParameters
,
ColorProcParameters
colorProcParameters_aux
,
EyesisCorrectionParameters
.
RGBParameters
rgbParameters
,
final
int
threadsMax
,
// maximal number of threads to launch
final
boolean
updateStatus
,
final
int
debugLevel
)
throws
Exception
{
this
.
startTime
=
System
.
nanoTime
();
...
...
@@ -523,6 +524,7 @@ public class TwoQuadCLT {
saturation_imp_main
,
// boolean [][] saturation_main, // (near) saturated pixels or null
saturation_imp_aux
,
// boolean [][] saturation_aux, // (near) saturated pixels or null
clt_parameters
,
// EyesisCorrectionParameters.CLTParameters clt_parameters,
ecp
,
// EyesisCorrectionParameters.CorrectionParameters ecp,
debayerParameters
,
// EyesisCorrectionParameters.DebayerParameters debayerParameters,
colorProcParameters
,
// EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
colorProcParameters_aux
,
// EyesisCorrectionParameters.ColorProcParameters colorProcParameters_aux,
...
...
@@ -1191,8 +1193,6 @@ public class TwoQuadCLT {
bb
.
order
(
ByteOrder
.
LITTLE_ENDIAN
);
bb
.
clear
();
for
(
int
i
=
0
;
i
<
port_xy
.
length
;
i
++)
{
// dos.writeFloat((float) (port_xy[i][chn][0])); // x-offset
// dos.writeFloat((float) (port_xy[i][chn][1])); // y-offset
bb
.
putFloat
((
float
)
(
port_xy
[
i
][
chn
][
0
]));
// x-offset
bb
.
putFloat
((
float
)
(
port_xy
[
i
][
chn
][
1
]));
// y-offset
}
...
...
@@ -1521,7 +1521,7 @@ public class TwoQuadCLT {
String
[]
rgba_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
};
String
[]
rgba_weights_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
,
"port0"
,
"port1"
,
"port2"
,
"port3"
,
"r-rms"
,
"b-rms"
,
"g-rms"
,
"w-rms"
};
if
((
texture_tiles_main
!=
null
)
&&
(
texture_tiles_aux
!=
null
)){
if
((
debugLevel
>
-
2
)
&&
(
clt_parameters
.
tileX
>=
0
)
&&
(
clt_parameters
.
tileY
>=
0
)
&&
(
clt_parameters
.
tileX
<
tilesX
)
&&
(
clt_parameters
.
tileY
<
tilesY
))
{
if
((
debugLevel
>
-
1
)
&&
(
clt_parameters
.
tileX
>=
0
)
&&
(
clt_parameters
.
tileY
>=
0
)
&&
(
clt_parameters
.
tileX
<
tilesX
)
&&
(
clt_parameters
.
tileY
<
tilesY
))
{
double
[][]
texture_tile
=
texture_tiles_main
[
clt_parameters
.
tileY
][
clt_parameters
.
tileX
];
int
tile
=
+
clt_parameters
.
tileY
*
tilesX
+
clt_parameters
.
tileX
;
System
.
out
.
println
(
"=== tileX= "
+
clt_parameters
.
tileX
+
" tileY= "
+
clt_parameters
.
tileY
+
" tile="
+
tile
+
" ==="
);
...
...
@@ -1895,22 +1895,23 @@ public class TwoQuadCLT {
}
public
ImagePlus
[]
processCLTQuadCorrPairGpu
(
GPUTileProcessor
gPUTileProcessor
,
QuadCLT
quadCLT_main
,
QuadCLT
quadCLT_aux
,
ImagePlus
[]
imp_quad_main
,
ImagePlus
[]
imp_quad_aux
,
boolean
[][]
saturation_main
,
// (near) saturated pixels or null
boolean
[][]
saturation_aux
,
// (near) saturated pixels or null
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
DebayerParameters
debayerParameters
,
ColorProcParameters
colorProcParameters
,
ColorProcParameters
colorProcParameters_aux
,
EyesisCorrectionParameters
.
RGBParameters
rgbParameters
,
double
[]
scaleExposures_main
,
// probably not needed here - restores brightness of the final image
double
[]
scaleExposures_aux
,
// probably not needed here - restores brightness of the final image
boolean
notch_mode
,
// use pole-detection mode for inter-camera correlation
final
int
lt_rad
,
// low texture mode - inter-correlation is averaged between the neighbors before argmax-ing, using
GPUTileProcessor
gPUTileProcessor
,
QuadCLT
quadCLT_main
,
QuadCLT
quadCLT_aux
,
ImagePlus
[]
imp_quad_main
,
ImagePlus
[]
imp_quad_aux
,
boolean
[][]
saturation_main
,
// (near) saturated pixels or null
boolean
[][]
saturation_aux
,
// (near) saturated pixels or null
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
CorrectionParameters
ecp
,
EyesisCorrectionParameters
.
DebayerParameters
debayerParameters
,
ColorProcParameters
colorProcParameters
,
ColorProcParameters
colorProcParameters_aux
,
EyesisCorrectionParameters
.
RGBParameters
rgbParameters
,
double
[]
scaleExposures_main
,
// probably not needed here - restores brightness of the final image
double
[]
scaleExposures_aux
,
// probably not needed here - restores brightness of the final image
boolean
notch_mode
,
// use pole-detection mode for inter-camera correlation
final
int
lt_rad
,
// low texture mode - inter-correlation is averaged between the neighbors before argmax-ing, using
final
int
threadsMax
,
// maximal number of threads to launch
final
boolean
updateStatus
,
final
int
debugLevel
){
...
...
@@ -1997,7 +1998,7 @@ public class TwoQuadCLT {
// Set task clt_parameters.disparity
GPUTileProcessor
.
TpTask
[]
tp_tasks
=
gPUTileProcessor
.
setFullFrameImages
(
(
float
)
clt_parameters
.
disparity
,
// float target_disparity, // apply same disparity to all tiles
0xf
,
// int out_image, // from which tiles to generate image (currently 0/1)
0xf
,
// int
out_image, // from which tiles to generate image (currently 0/1)
0x3f
,
// int corr_mask, // which correlation pairs to generate (maybe later - reduce size from 15x15)
!
use_aux
,
// boolean use_master,
use_aux
,
// boolean use_aux,
...
...
@@ -2007,6 +2008,47 @@ public class TwoQuadCLT {
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
debugLevel
);
// final int debugLevel)
// Optionally save offsets here?
// EyesisCorrectionParameters.CorrectionParameters ecp,
boolean
save_ports_xy
=
false
;
// true; Same files as saved with the kernels
if
((
ecp
.
tile_processor_gpu
!=
null
)
&&
!
ecp
.
tile_processor_gpu
.
isEmpty
()
&&
save_ports_xy
)
{
int
quad
=
4
;
String
file_prefix
=
ecp
.
tile_processor_gpu
+
"clt/main"
;
for
(
int
chn
=
0
;
chn
<
quad
;
chn
++)
{
String
img_path
=
file_prefix
+
"_chn"
+
chn
+
".portsxy"
;
FileOutputStream
fos
;
try
{
fos
=
new
FileOutputStream
(
img_path
);
}
catch
(
FileNotFoundException
e
)
{
// TODO Auto-generated catch block
System
.
out
.
println
(
"Could not write to "
+
img_path
+
" (file not found) port offsets"
);
break
;
}
DataOutputStream
dos
=
new
DataOutputStream
(
fos
);
WritableByteChannel
channel
=
Channels
.
newChannel
(
dos
);
ByteBuffer
bb
=
ByteBuffer
.
allocate
(
tp_tasks
.
length
*
2
*
4
);
bb
.
order
(
ByteOrder
.
LITTLE_ENDIAN
);
bb
.
clear
();
for
(
int
i
=
0
;
i
<
tp_tasks
.
length
;
i
++)
{
bb
.
putFloat
((
tp_tasks
[
i
].
xy
[
chn
][
0
]));
// x-offset
bb
.
putFloat
((
tp_tasks
[
i
].
xy
[
chn
][
1
]));
// y-offset
}
bb
.
flip
();
try
{
channel
.
write
(
bb
);
}
catch
(
IOException
e
)
{
System
.
out
.
println
(
"Could not write to "
+
img_path
+
" port offsets"
);
break
;
}
try
{
dos
.
close
();
}
catch
(
IOException
e
)
{
System
.
out
.
println
(
"Could not close DataOutputStream for "
+
img_path
+
" port offsets"
);
}
System
.
out
.
println
(
"Wrote port offsets to "
+
img_path
+
"."
);
}
}
gPUTileProcessor
.
setTasks
(
tp_tasks
,
// TpTask [] tile_tasks,
use_aux
);
// boolean use_aux)
...
...
@@ -2236,11 +2278,25 @@ public class TwoQuadCLT {
texture_stack
.
addSlice
(
"main"
,
imp_rgba_main
.
getProcessor
().
getPixels
());
// single slice
ImagePlus
imp_texture_stack
=
new
ImagePlus
(
name
+
"-RGBA-D"
+
clt_parameters
.
disparity
,
texture_stack
);
imp_texture_stack
.
getProcessor
().
resetMinAndMax
();
imp_texture_stack
.
show
();
// imp_texture_stack.show();
String
results_path
=
quadCLT_main
.
correctionsParameters
.
selectResultsDirectory
(
// selectX3dDirectory(
// name, // quad timestamp. Will be ignored if correctionsParameters.use_x3d_subdirs is false
// quadCLT_main.correctionsParameters.x3dModelVersion,
true
,
// smart,
true
);
//newAllowed, // save
quadCLT_main
.
eyesisCorrections
.
saveAndShow
(
imp_texture_stack
,
results_path
,
true
,
// quadCLT_main.correctionsParameters.png && !clt_parameters.black_back,
true
,
// !batch_mode && clt_parameters.show_textures,
0
,
// quadCLT_main.correctionsParameters.JPEG_quality, // jpegQuality); // jpegQuality){// <0 - keep current, 0 - force Tiff, >0 use for JPEG
(
debugLevel
>
0
)
?
debugLevel
:
1
);
// int debugLevel (print what it saves)
}
// convert textures to RGBA in Java
if
(
clt_parameters
.
show_rgba_color
)
{
if
(
clt_parameters
.
show_rgba_color
&&
(
debugLevel
>
0
))
{
// disabling
int
numcol
=
quadCLT_main
.
isMonochrome
()?
1
:
3
;
int
ports
=
imp_quad_main
.
length
;
int
num_src_slices
=
numcol
+
1
+
(
clt_parameters
.
keep_weights
?(
ports
+
numcol
+
1
):
0
);
// 12 ; // calculate
...
...
@@ -2253,7 +2309,7 @@ public class TwoQuadCLT {
int
texture_slice_size
=
(
2
*
GPUTileProcessor
.
DTT_SIZE
)*
(
2
*
GPUTileProcessor
.
DTT_SIZE
);
int
texture_tile_size
=
texture_slice_size
*
num_src_slices
;
if
(
debugLevel
>
-
2
)
{
if
(
debugLevel
>
-
1
)
{
for
(
int
indx
=
0
;
indx
<
texture_indices
.
length
;
indx
++)
if
((
texture_indices
[
indx
]
&
(
1
<<
GPUTileProcessor
.
LIST_TEXTURE_BIT
))
!=
0
){
int
tile
=
texture_indices
[
indx
]
>>
GPUTileProcessor
.
CORR_NTILE_SHIFT
;
int
tileX
=
tile
%
tilesX
;
...
...
@@ -2284,7 +2340,7 @@ public class TwoQuadCLT {
num_src_slices
// int num_src_slices
);
if
((
debugLevel
>
-
2
)
&&
(
clt_parameters
.
tileX
>=
0
)
&&
(
clt_parameters
.
tileY
>=
0
)
&&
(
clt_parameters
.
tileX
<
tilesX
)
&&
(
clt_parameters
.
tileY
<
tilesY
))
{
if
((
debugLevel
>
-
1
)
&&
(
clt_parameters
.
tileX
>=
0
)
&&
(
clt_parameters
.
tileY
>=
0
)
&&
(
clt_parameters
.
tileX
<
tilesX
)
&&
(
clt_parameters
.
tileY
<
tilesY
))
{
String
[]
rgba_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
};
String
[]
rgba_weights_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
,
"port0"
,
"port1"
,
"port2"
,
"port3"
,
"r-rms"
,
"b-rms"
,
"g-rms"
,
"w-rms"
};
double
[][]
texture_tile
=
texture_tiles
[
clt_parameters
.
tileY
][
clt_parameters
.
tileX
];
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
10c327ae
...
...
@@ -90,7 +90,7 @@
#define DEBUG11 1
#define DEBUG12 1
//#define USE_textures_gen
#define DEBUG_OOB1 1
#endif //#ifndef JCUDA
#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
...
...
@@ -169,8 +169,8 @@
//#define BAYER_BLUE_COL (1 - BAYER_RED_COL)
//#define DBG_TILE_X 40
//#define DBG_TILE_Y 80
#define DBG_TILE_X 49
#define DBG_TILE_Y 66
#define DBG_TILE_X
161 //
49
#define DBG_TILE_Y
111 //
66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
//56494
...
...
@@ -1562,8 +1562,8 @@ __global__ void generate_RBGA(
#ifdef DEBUG12
printf
(
"
\n
generate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d
\n
"
,
pass
,
border_tile
,
ti_offset
,
ntt
);
printf
(
"
\n
generate_RBGA() gpu_texture_indices=
0x%x, gpu_texture_indices + ti_offset=0x%x
\n
"
,
(
int
)
gpu_texture_indices
,
(
int
)
(
gpu_texture_indices
+
ti_offset
));
printf
(
"
\n
generate_RBGA() gpu_texture_indices=
%p, gpu_texture_indices + ti_offset= %p
\n
"
,
(
void
*
)
gpu_texture_indices
,
(
void
*
)
(
gpu_texture_indices
+
ti_offset
));
printf
(
"
\n
generate_RBGA() grid_texture={%d, %d, %d)
\n
"
,
grid_texture
.
x
,
grid_texture
.
y
,
grid_texture
.
z
);
printf
(
"
\n
generate_RBGA() threads_texture={%d, %d, %d)
\n
"
,
...
...
@@ -1833,8 +1833,8 @@ __global__ void gen_texture_list(
if
((
x
==
DBG_TILE_X
)
&&
(
y
==
DBG_TILE_Y
)){
printf
(
"
\n
gen_texture_list() buff_head=%d, buf_offset = %d, num_offset= %d, is_border=%d
\n
"
,
buff_head
,
buf_offset
,
num_offset
,
is_border
);
printf
(
"
\n
gen_texture_list() gpu_texture_indices =
0x%x, gpu_texture_indices + buf_offset = 0x%x
\n
"
,
(
int
)
gpu_texture_indices
,
(
int
)
(
gpu_texture_indices
+
buf_offset
));
printf
(
"
\n
gen_texture_list() gpu_texture_indices =
%p, gpu_texture_indices + buf_offset = %p
\n
"
,
(
void
*
)
gpu_texture_indices
,
(
void
*
)
(
gpu_texture_indices
+
buf_offset
));
}
__syncthreads
();
// __syncwarp();
#endif // DEBUG12
...
...
@@ -3170,6 +3170,12 @@ __device__ void convertCorrectTile(
px
=
centerX
-
DTT_SIZE
-
(
clt_extra
->
data_x
+
clt_extra
->
dxc_dx
*
kdx
+
clt_extra
->
dxc_dy
*
kdy
)
;
// fractional left corner
int
itlx
=
(
int
)
floorf
(
px
+
0.5
f
);
if
(
itlx
<
0
){
itlx
&=
1
;
// for color - extend by pairs
}
if
(
itlx
>=
(
IMG_WIDTH
-
DTT_SIZE
)){
itlx
=
itlx
&
1
+
(
IMG_WIDTH
-
DTT_SIZE
-
2
);
// for color - extend by pairs
}
int_topleft
[
0
]
=
itlx
;
float
shift_hor
=
itlx
-
px
;
residual_shift
[
0
]
=
shift_hor
;
...
...
@@ -3205,8 +3211,24 @@ __device__ void convertCorrectTile(
py
=
centerY
-
DTT_SIZE
-
(
clt_extra
->
data_y
+
clt_extra
->
dyc_dx
*
kdx
+
clt_extra
->
dyc_dy
*
kdy
)
;
// fractional top corner
int
itly
=
(
int
)
floorf
(
py
+
0.5
f
);
if
(
itly
<
0
){
itly
&=
1
;
// for color - extend by pairs
}
if
(
itly
>=
(
IMG_HEIGHT
-
DTT_SIZE
)){
itly
=
(
itly
&
1
)
+
(
IMG_HEIGHT
-
DTT_SIZE
-
2
);
// for color - extend by pairs
}
int_topleft
[
1
]
=
itly
;
#ifdef DEBUG_OOB1
if
((
int_topleft
[
0
]
<
0
)
||
(
int_topleft
[
1
]
<
0
)
||
(
int_topleft
[
0
]
>=
(
IMG_WIDTH
-
DTT_SIZE
))
||
(
int_topleft
[
1
]
>=
IMG_HEIGHT
-
DTT_SIZE
)){
printf
(
"Source data OOB, left=%d, top=%d
\n
"
,
int_topleft
[
0
],
int_topleft
[
1
]);
printf
(
"
\n
"
);
printf
(
"
\n
"
);
__syncthreads
();
// __syncwarp();
}
#endif // DEBUG_OOB1
float
shift_vert
=
itly
-
py
;
residual_shift
[
1
]
=
shift_vert
;
x
=
shift_vert
*
(
1.0
f
/
16
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment