Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
10c327ae
Commit
10c327ae
authored
Apr 05, 2020
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
debugging GPU OOB with large disparities, fixed in the kernel code
parent
b5bfb231
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
121 additions
and
42 deletions
+121
-42
Eyesis_Correction.java
.../java/com/elphel/imagej/correction/Eyesis_Correction.java
+1
-0
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+1
-1
TwoQuadCLT.java
...main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+90
-34
TileProcessor.cuh
src/main/resources/kernels/TileProcessor.cuh
+29
-7
No files found.
src/main/java/com/elphel/imagej/correction/Eyesis_Correction.java
View file @
10c327ae
...
@@ -5812,6 +5812,7 @@ private Panel panel1,
...
@@ -5812,6 +5812,7 @@ private Panel panel1,
QUAD_CLT
,
// QuadCLT quadCLT_main,
QUAD_CLT
,
// QuadCLT quadCLT_main,
QUAD_CLT_AUX
,
// QuadCLT quadCLT_aux,
QUAD_CLT_AUX
,
// QuadCLT quadCLT_aux,
CLT_PARAMETERS
,
// EyesisCorrectionParameters.DCTParameters dct_parameters,
CLT_PARAMETERS
,
// EyesisCorrectionParameters.DCTParameters dct_parameters,
CORRECTION_PARAMETERS
,
// EyesisCorrectionParameters ecp,
DEBAYER_PARAMETERS
,
//EyesisCorrectionParameters.DebayerParameters debayerParameters,
DEBAYER_PARAMETERS
,
//EyesisCorrectionParameters.DebayerParameters debayerParameters,
COLOR_PROC_PARAMETERS
,
//EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
COLOR_PROC_PARAMETERS
,
//EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
COLOR_PROC_PARAMETERS_AUX
,
//EyesisCorrectionParameters.ColorProcParameters colorProcParameters_aux,
COLOR_PROC_PARAMETERS_AUX
,
//EyesisCorrectionParameters.ColorProcParameters colorProcParameters_aux,
...
...
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
10c327ae
...
@@ -1359,7 +1359,7 @@ public class GPUTileProcessor {
...
@@ -1359,7 +1359,7 @@ public class GPUTileProcessor {
public
void
getTileSubcamOffsets
(
public
void
getTileSubcamOffsets
(
final
TpTask
[]
tp_tasks
,
// will use // modify to have offsets for 8 cameras
final
TpTask
[]
tp_tasks
,
// will use // modify to have offsets for 8 cameras
final
GeometryCorrection
geometryCorrection_main
,
final
GeometryCorrection
geometryCorrection_main
,
final
GeometryCorrection
geometryCorrection_aux
,
// if null, will only calculate offsets fro the main camera
final
GeometryCorrection
geometryCorrection_aux
,
// if null, will only calculate offsets fro
m
the main camera
final
double
[][][]
ers_delay
,
// if not null - fill with tile center acquisition delay
final
double
[][][]
ers_delay
,
// if not null - fill with tile center acquisition delay
final
int
threadsMax
,
// maximal number of threads to launch
final
int
threadsMax
,
// maximal number of threads to launch
final
int
debugLevel
)
final
int
debugLevel
)
...
...
src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
View file @
10c327ae
...
@@ -456,6 +456,7 @@ public class TwoQuadCLT {
...
@@ -456,6 +456,7 @@ public class TwoQuadCLT {
QuadCLT
quadCLT_main
,
QuadCLT
quadCLT_main
,
QuadCLT
quadCLT_aux
,
QuadCLT
quadCLT_aux
,
CLTParameters
clt_parameters
,
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
CorrectionParameters
ecp
,
EyesisCorrectionParameters
.
DebayerParameters
debayerParameters
,
EyesisCorrectionParameters
.
DebayerParameters
debayerParameters
,
ColorProcParameters
colorProcParameters
,
ColorProcParameters
colorProcParameters
,
ColorProcParameters
colorProcParameters_aux
,
ColorProcParameters
colorProcParameters_aux
,
...
@@ -523,6 +524,7 @@ public class TwoQuadCLT {
...
@@ -523,6 +524,7 @@ public class TwoQuadCLT {
saturation_imp_main
,
// boolean [][] saturation_main, // (near) saturated pixels or null
saturation_imp_main
,
// boolean [][] saturation_main, // (near) saturated pixels or null
saturation_imp_aux
,
// boolean [][] saturation_aux, // (near) saturated pixels or null
saturation_imp_aux
,
// boolean [][] saturation_aux, // (near) saturated pixels or null
clt_parameters
,
// EyesisCorrectionParameters.CLTParameters clt_parameters,
clt_parameters
,
// EyesisCorrectionParameters.CLTParameters clt_parameters,
ecp
,
// EyesisCorrectionParameters.CorrectionParameters ecp,
debayerParameters
,
// EyesisCorrectionParameters.DebayerParameters debayerParameters,
debayerParameters
,
// EyesisCorrectionParameters.DebayerParameters debayerParameters,
colorProcParameters
,
// EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
colorProcParameters
,
// EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
colorProcParameters_aux
,
// EyesisCorrectionParameters.ColorProcParameters colorProcParameters_aux,
colorProcParameters_aux
,
// EyesisCorrectionParameters.ColorProcParameters colorProcParameters_aux,
...
@@ -1191,8 +1193,6 @@ public class TwoQuadCLT {
...
@@ -1191,8 +1193,6 @@ public class TwoQuadCLT {
bb
.
order
(
ByteOrder
.
LITTLE_ENDIAN
);
bb
.
order
(
ByteOrder
.
LITTLE_ENDIAN
);
bb
.
clear
();
bb
.
clear
();
for
(
int
i
=
0
;
i
<
port_xy
.
length
;
i
++)
{
for
(
int
i
=
0
;
i
<
port_xy
.
length
;
i
++)
{
// dos.writeFloat((float) (port_xy[i][chn][0])); // x-offset
// dos.writeFloat((float) (port_xy[i][chn][1])); // y-offset
bb
.
putFloat
((
float
)
(
port_xy
[
i
][
chn
][
0
]));
// x-offset
bb
.
putFloat
((
float
)
(
port_xy
[
i
][
chn
][
0
]));
// x-offset
bb
.
putFloat
((
float
)
(
port_xy
[
i
][
chn
][
1
]));
// y-offset
bb
.
putFloat
((
float
)
(
port_xy
[
i
][
chn
][
1
]));
// y-offset
}
}
...
@@ -1521,7 +1521,7 @@ public class TwoQuadCLT {
...
@@ -1521,7 +1521,7 @@ public class TwoQuadCLT {
String
[]
rgba_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
};
String
[]
rgba_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
};
String
[]
rgba_weights_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
,
"port0"
,
"port1"
,
"port2"
,
"port3"
,
"r-rms"
,
"b-rms"
,
"g-rms"
,
"w-rms"
};
String
[]
rgba_weights_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
,
"port0"
,
"port1"
,
"port2"
,
"port3"
,
"r-rms"
,
"b-rms"
,
"g-rms"
,
"w-rms"
};
if
((
texture_tiles_main
!=
null
)
&&
(
texture_tiles_aux
!=
null
)){
if
((
texture_tiles_main
!=
null
)
&&
(
texture_tiles_aux
!=
null
)){
if
((
debugLevel
>
-
2
)
&&
(
clt_parameters
.
tileX
>=
0
)
&&
(
clt_parameters
.
tileY
>=
0
)
&&
(
clt_parameters
.
tileX
<
tilesX
)
&&
(
clt_parameters
.
tileY
<
tilesY
))
{
if
((
debugLevel
>
-
1
)
&&
(
clt_parameters
.
tileX
>=
0
)
&&
(
clt_parameters
.
tileY
>=
0
)
&&
(
clt_parameters
.
tileX
<
tilesX
)
&&
(
clt_parameters
.
tileY
<
tilesY
))
{
double
[][]
texture_tile
=
texture_tiles_main
[
clt_parameters
.
tileY
][
clt_parameters
.
tileX
];
double
[][]
texture_tile
=
texture_tiles_main
[
clt_parameters
.
tileY
][
clt_parameters
.
tileX
];
int
tile
=
+
clt_parameters
.
tileY
*
tilesX
+
clt_parameters
.
tileX
;
int
tile
=
+
clt_parameters
.
tileY
*
tilesX
+
clt_parameters
.
tileX
;
System
.
out
.
println
(
"=== tileX= "
+
clt_parameters
.
tileX
+
" tileY= "
+
clt_parameters
.
tileY
+
" tile="
+
tile
+
" ==="
);
System
.
out
.
println
(
"=== tileX= "
+
clt_parameters
.
tileX
+
" tileY= "
+
clt_parameters
.
tileY
+
" tile="
+
tile
+
" ==="
);
...
@@ -1903,6 +1903,7 @@ public class TwoQuadCLT {
...
@@ -1903,6 +1903,7 @@ public class TwoQuadCLT {
boolean
[][]
saturation_main
,
// (near) saturated pixels or null
boolean
[][]
saturation_main
,
// (near) saturated pixels or null
boolean
[][]
saturation_aux
,
// (near) saturated pixels or null
boolean
[][]
saturation_aux
,
// (near) saturated pixels or null
CLTParameters
clt_parameters
,
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
CorrectionParameters
ecp
,
EyesisCorrectionParameters
.
DebayerParameters
debayerParameters
,
EyesisCorrectionParameters
.
DebayerParameters
debayerParameters
,
ColorProcParameters
colorProcParameters
,
ColorProcParameters
colorProcParameters
,
ColorProcParameters
colorProcParameters_aux
,
ColorProcParameters
colorProcParameters_aux
,
...
@@ -2007,6 +2008,47 @@ public class TwoQuadCLT {
...
@@ -2007,6 +2008,47 @@ public class TwoQuadCLT {
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
debugLevel
);
// final int debugLevel)
debugLevel
);
// final int debugLevel)
// Optionally save offsets here?
// EyesisCorrectionParameters.CorrectionParameters ecp,
boolean
save_ports_xy
=
false
;
// true; Same files as saved with the kernels
if
((
ecp
.
tile_processor_gpu
!=
null
)
&&
!
ecp
.
tile_processor_gpu
.
isEmpty
()
&&
save_ports_xy
)
{
int
quad
=
4
;
String
file_prefix
=
ecp
.
tile_processor_gpu
+
"clt/main"
;
for
(
int
chn
=
0
;
chn
<
quad
;
chn
++)
{
String
img_path
=
file_prefix
+
"_chn"
+
chn
+
".portsxy"
;
FileOutputStream
fos
;
try
{
fos
=
new
FileOutputStream
(
img_path
);
}
catch
(
FileNotFoundException
e
)
{
// TODO Auto-generated catch block
System
.
out
.
println
(
"Could not write to "
+
img_path
+
" (file not found) port offsets"
);
break
;
}
DataOutputStream
dos
=
new
DataOutputStream
(
fos
);
WritableByteChannel
channel
=
Channels
.
newChannel
(
dos
);
ByteBuffer
bb
=
ByteBuffer
.
allocate
(
tp_tasks
.
length
*
2
*
4
);
bb
.
order
(
ByteOrder
.
LITTLE_ENDIAN
);
bb
.
clear
();
for
(
int
i
=
0
;
i
<
tp_tasks
.
length
;
i
++)
{
bb
.
putFloat
((
tp_tasks
[
i
].
xy
[
chn
][
0
]));
// x-offset
bb
.
putFloat
((
tp_tasks
[
i
].
xy
[
chn
][
1
]));
// y-offset
}
bb
.
flip
();
try
{
channel
.
write
(
bb
);
}
catch
(
IOException
e
)
{
System
.
out
.
println
(
"Could not write to "
+
img_path
+
" port offsets"
);
break
;
}
try
{
dos
.
close
();
}
catch
(
IOException
e
)
{
System
.
out
.
println
(
"Could not close DataOutputStream for "
+
img_path
+
" port offsets"
);
}
System
.
out
.
println
(
"Wrote port offsets to "
+
img_path
+
"."
);
}
}
gPUTileProcessor
.
setTasks
(
gPUTileProcessor
.
setTasks
(
tp_tasks
,
// TpTask [] tile_tasks,
tp_tasks
,
// TpTask [] tile_tasks,
use_aux
);
// boolean use_aux)
use_aux
);
// boolean use_aux)
...
@@ -2236,11 +2278,25 @@ public class TwoQuadCLT {
...
@@ -2236,11 +2278,25 @@ public class TwoQuadCLT {
texture_stack
.
addSlice
(
"main"
,
imp_rgba_main
.
getProcessor
().
getPixels
());
// single slice
texture_stack
.
addSlice
(
"main"
,
imp_rgba_main
.
getProcessor
().
getPixels
());
// single slice
ImagePlus
imp_texture_stack
=
new
ImagePlus
(
name
+
"-RGBA-D"
+
clt_parameters
.
disparity
,
texture_stack
);
ImagePlus
imp_texture_stack
=
new
ImagePlus
(
name
+
"-RGBA-D"
+
clt_parameters
.
disparity
,
texture_stack
);
imp_texture_stack
.
getProcessor
().
resetMinAndMax
();
imp_texture_stack
.
getProcessor
().
resetMinAndMax
();
imp_texture_stack
.
show
();
// imp_texture_stack.show();
String
results_path
=
quadCLT_main
.
correctionsParameters
.
selectResultsDirectory
(
// selectX3dDirectory(
// name, // quad timestamp. Will be ignored if correctionsParameters.use_x3d_subdirs is false
// quadCLT_main.correctionsParameters.x3dModelVersion,
true
,
// smart,
true
);
//newAllowed, // save
quadCLT_main
.
eyesisCorrections
.
saveAndShow
(
imp_texture_stack
,
results_path
,
true
,
// quadCLT_main.correctionsParameters.png && !clt_parameters.black_back,
true
,
// !batch_mode && clt_parameters.show_textures,
0
,
// quadCLT_main.correctionsParameters.JPEG_quality, // jpegQuality); // jpegQuality){// <0 - keep current, 0 - force Tiff, >0 use for JPEG
(
debugLevel
>
0
)
?
debugLevel
:
1
);
// int debugLevel (print what it saves)
}
}
// convert textures to RGBA in Java
// convert textures to RGBA in Java
if
(
clt_parameters
.
show_rgba_color
)
{
if
(
clt_parameters
.
show_rgba_color
&&
(
debugLevel
>
0
))
{
// disabling
int
numcol
=
quadCLT_main
.
isMonochrome
()?
1
:
3
;
int
numcol
=
quadCLT_main
.
isMonochrome
()?
1
:
3
;
int
ports
=
imp_quad_main
.
length
;
int
ports
=
imp_quad_main
.
length
;
int
num_src_slices
=
numcol
+
1
+
(
clt_parameters
.
keep_weights
?(
ports
+
numcol
+
1
):
0
);
// 12 ; // calculate
int
num_src_slices
=
numcol
+
1
+
(
clt_parameters
.
keep_weights
?(
ports
+
numcol
+
1
):
0
);
// 12 ; // calculate
...
@@ -2253,7 +2309,7 @@ public class TwoQuadCLT {
...
@@ -2253,7 +2309,7 @@ public class TwoQuadCLT {
int
texture_slice_size
=
(
2
*
GPUTileProcessor
.
DTT_SIZE
)*
(
2
*
GPUTileProcessor
.
DTT_SIZE
);
int
texture_slice_size
=
(
2
*
GPUTileProcessor
.
DTT_SIZE
)*
(
2
*
GPUTileProcessor
.
DTT_SIZE
);
int
texture_tile_size
=
texture_slice_size
*
num_src_slices
;
int
texture_tile_size
=
texture_slice_size
*
num_src_slices
;
if
(
debugLevel
>
-
2
)
{
if
(
debugLevel
>
-
1
)
{
for
(
int
indx
=
0
;
indx
<
texture_indices
.
length
;
indx
++)
if
((
texture_indices
[
indx
]
&
(
1
<<
GPUTileProcessor
.
LIST_TEXTURE_BIT
))
!=
0
){
for
(
int
indx
=
0
;
indx
<
texture_indices
.
length
;
indx
++)
if
((
texture_indices
[
indx
]
&
(
1
<<
GPUTileProcessor
.
LIST_TEXTURE_BIT
))
!=
0
){
int
tile
=
texture_indices
[
indx
]
>>
GPUTileProcessor
.
CORR_NTILE_SHIFT
;
int
tile
=
texture_indices
[
indx
]
>>
GPUTileProcessor
.
CORR_NTILE_SHIFT
;
int
tileX
=
tile
%
tilesX
;
int
tileX
=
tile
%
tilesX
;
...
@@ -2284,7 +2340,7 @@ public class TwoQuadCLT {
...
@@ -2284,7 +2340,7 @@ public class TwoQuadCLT {
num_src_slices
// int num_src_slices
num_src_slices
// int num_src_slices
);
);
if
((
debugLevel
>
-
2
)
&&
(
clt_parameters
.
tileX
>=
0
)
&&
(
clt_parameters
.
tileY
>=
0
)
&&
(
clt_parameters
.
tileX
<
tilesX
)
&&
(
clt_parameters
.
tileY
<
tilesY
))
{
if
((
debugLevel
>
-
1
)
&&
(
clt_parameters
.
tileX
>=
0
)
&&
(
clt_parameters
.
tileY
>=
0
)
&&
(
clt_parameters
.
tileX
<
tilesX
)
&&
(
clt_parameters
.
tileY
<
tilesY
))
{
String
[]
rgba_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
};
String
[]
rgba_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
};
String
[]
rgba_weights_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
,
"port0"
,
"port1"
,
"port2"
,
"port3"
,
"r-rms"
,
"b-rms"
,
"g-rms"
,
"w-rms"
};
String
[]
rgba_weights_titles
=
{
"red"
,
"blue"
,
"green"
,
"alpha"
,
"port0"
,
"port1"
,
"port2"
,
"port3"
,
"r-rms"
,
"b-rms"
,
"g-rms"
,
"w-rms"
};
double
[][]
texture_tile
=
texture_tiles
[
clt_parameters
.
tileY
][
clt_parameters
.
tileX
];
double
[][]
texture_tile
=
texture_tiles
[
clt_parameters
.
tileY
][
clt_parameters
.
tileX
];
...
...
src/main/resources/kernels/TileProcessor.cuh
View file @
10c327ae
...
@@ -90,7 +90,7 @@
...
@@ -90,7 +90,7 @@
#define DEBUG11 1
#define DEBUG11 1
#define DEBUG12 1
#define DEBUG12 1
//#define USE_textures_gen
//#define USE_textures_gen
#define DEBUG_OOB1 1
#endif //#ifndef JCUDA
#endif //#ifndef JCUDA
#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
...
@@ -169,8 +169,8 @@
...
@@ -169,8 +169,8 @@
//#define BAYER_BLUE_COL (1 - BAYER_RED_COL)
//#define BAYER_BLUE_COL (1 - BAYER_RED_COL)
//#define DBG_TILE_X 40
//#define DBG_TILE_X 40
//#define DBG_TILE_Y 80
//#define DBG_TILE_Y 80
#define DBG_TILE_X 49
#define DBG_TILE_X
161 //
49
#define DBG_TILE_Y 66
#define DBG_TILE_Y
111 //
66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
//56494
//56494
...
@@ -1562,8 +1562,8 @@ __global__ void generate_RBGA(
...
@@ -1562,8 +1562,8 @@ __global__ void generate_RBGA(
#ifdef DEBUG12
#ifdef DEBUG12
printf
(
"
\n
generate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d
\n
"
,
printf
(
"
\n
generate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d
\n
"
,
pass
,
border_tile
,
ti_offset
,
ntt
);
pass
,
border_tile
,
ti_offset
,
ntt
);
printf
(
"
\n
generate_RBGA() gpu_texture_indices=
0x%x, gpu_texture_indices + ti_offset=0x%x
\n
"
,
printf
(
"
\n
generate_RBGA() gpu_texture_indices=
%p, gpu_texture_indices + ti_offset= %p
\n
"
,
(
int
)
gpu_texture_indices
,
(
int
)
(
gpu_texture_indices
+
ti_offset
));
(
void
*
)
gpu_texture_indices
,
(
void
*
)
(
gpu_texture_indices
+
ti_offset
));
printf
(
"
\n
generate_RBGA() grid_texture={%d, %d, %d)
\n
"
,
printf
(
"
\n
generate_RBGA() grid_texture={%d, %d, %d)
\n
"
,
grid_texture
.
x
,
grid_texture
.
y
,
grid_texture
.
z
);
grid_texture
.
x
,
grid_texture
.
y
,
grid_texture
.
z
);
printf
(
"
\n
generate_RBGA() threads_texture={%d, %d, %d)
\n
"
,
printf
(
"
\n
generate_RBGA() threads_texture={%d, %d, %d)
\n
"
,
...
@@ -1833,8 +1833,8 @@ __global__ void gen_texture_list(
...
@@ -1833,8 +1833,8 @@ __global__ void gen_texture_list(
if
((
x
==
DBG_TILE_X
)
&&
(
y
==
DBG_TILE_Y
)){
if
((
x
==
DBG_TILE_X
)
&&
(
y
==
DBG_TILE_Y
)){
printf
(
"
\n
gen_texture_list() buff_head=%d, buf_offset = %d, num_offset= %d, is_border=%d
\n
"
,
printf
(
"
\n
gen_texture_list() buff_head=%d, buf_offset = %d, num_offset= %d, is_border=%d
\n
"
,
buff_head
,
buf_offset
,
num_offset
,
is_border
);
buff_head
,
buf_offset
,
num_offset
,
is_border
);
printf
(
"
\n
gen_texture_list() gpu_texture_indices =
0x%x, gpu_texture_indices + buf_offset = 0x%x
\n
"
,
printf
(
"
\n
gen_texture_list() gpu_texture_indices =
%p, gpu_texture_indices + buf_offset = %p
\n
"
,
(
int
)
gpu_texture_indices
,
(
int
)
(
gpu_texture_indices
+
buf_offset
));
(
void
*
)
gpu_texture_indices
,
(
void
*
)
(
gpu_texture_indices
+
buf_offset
));
}
}
__syncthreads
();
// __syncwarp();
__syncthreads
();
// __syncwarp();
#endif // DEBUG12
#endif // DEBUG12
...
@@ -3170,6 +3170,12 @@ __device__ void convertCorrectTile(
...
@@ -3170,6 +3170,12 @@ __device__ void convertCorrectTile(
px
=
centerX
-
DTT_SIZE
-
(
clt_extra
->
data_x
+
clt_extra
->
dxc_dx
*
kdx
+
clt_extra
->
dxc_dy
*
kdy
)
;
// fractional left corner
px
=
centerX
-
DTT_SIZE
-
(
clt_extra
->
data_x
+
clt_extra
->
dxc_dx
*
kdx
+
clt_extra
->
dxc_dy
*
kdy
)
;
// fractional left corner
int
itlx
=
(
int
)
floorf
(
px
+
0.5
f
);
int
itlx
=
(
int
)
floorf
(
px
+
0.5
f
);
if
(
itlx
<
0
){
itlx
&=
1
;
// for color - extend by pairs
}
if
(
itlx
>=
(
IMG_WIDTH
-
DTT_SIZE
)){
itlx
=
itlx
&
1
+
(
IMG_WIDTH
-
DTT_SIZE
-
2
);
// for color - extend by pairs
}
int_topleft
[
0
]
=
itlx
;
int_topleft
[
0
]
=
itlx
;
float
shift_hor
=
itlx
-
px
;
float
shift_hor
=
itlx
-
px
;
residual_shift
[
0
]
=
shift_hor
;
residual_shift
[
0
]
=
shift_hor
;
...
@@ -3205,8 +3211,24 @@ __device__ void convertCorrectTile(
...
@@ -3205,8 +3211,24 @@ __device__ void convertCorrectTile(
py
=
centerY
-
DTT_SIZE
-
(
clt_extra
->
data_y
+
clt_extra
->
dyc_dx
*
kdx
+
clt_extra
->
dyc_dy
*
kdy
)
;
// fractional top corner
py
=
centerY
-
DTT_SIZE
-
(
clt_extra
->
data_y
+
clt_extra
->
dyc_dx
*
kdx
+
clt_extra
->
dyc_dy
*
kdy
)
;
// fractional top corner
int
itly
=
(
int
)
floorf
(
py
+
0.5
f
);
int
itly
=
(
int
)
floorf
(
py
+
0.5
f
);
if
(
itly
<
0
){
itly
&=
1
;
// for color - extend by pairs
}
if
(
itly
>=
(
IMG_HEIGHT
-
DTT_SIZE
)){
itly
=
(
itly
&
1
)
+
(
IMG_HEIGHT
-
DTT_SIZE
-
2
);
// for color - extend by pairs
}
int_topleft
[
1
]
=
itly
;
int_topleft
[
1
]
=
itly
;
#ifdef DEBUG_OOB1
if
((
int_topleft
[
0
]
<
0
)
||
(
int_topleft
[
1
]
<
0
)
||
(
int_topleft
[
0
]
>=
(
IMG_WIDTH
-
DTT_SIZE
))
||
(
int_topleft
[
1
]
>=
IMG_HEIGHT
-
DTT_SIZE
)){
printf
(
"Source data OOB, left=%d, top=%d
\n
"
,
int_topleft
[
0
],
int_topleft
[
1
]);
printf
(
"
\n
"
);
printf
(
"
\n
"
);
__syncthreads
();
// __syncwarp();
}
#endif // DEBUG_OOB1
float
shift_vert
=
itly
-
py
;
float
shift_vert
=
itly
-
py
;
residual_shift
[
1
]
=
shift_vert
;
residual_shift
[
1
]
=
shift_vert
;
x
=
shift_vert
*
(
1.0
f
/
16
);
x
=
shift_vert
*
(
1.0
f
/
16
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment