Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
72b6bdce
Commit
72b6bdce
authored
Oct 09, 2018
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Got 4 images converted, corrected, and converted back with JCUDA!
parent
184a23d0
Changes
7
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1338 additions
and
81 deletions
+1338
-81
Eyesis_Correction.java
src/main/java/Eyesis_Correction.java
+56
-1
GPUTileProcessor.java
src/main/java/GPUTileProcessor.java
+829
-43
QuadCLT.java
src/main/java/QuadCLT.java
+102
-9
TwoQuadCLT.java
src/main/java/TwoQuadCLT.java
+290
-3
showDoubleFloatArrays.java
src/main/java/showDoubleFloatArrays.java
+24
-0
TileProcessor.cuh
src/main/resources/TileProcessor.cuh
+34
-23
dtt8x8.cuh
src/main/resources/dtt8x8.cuh
+3
-2
No files found.
src/main/java/Eyesis_Correction.java
View file @
72b6bdce
...
@@ -640,6 +640,7 @@ private Panel panel1,
...
@@ -640,6 +640,7 @@ private Panel panel1,
panelClt_GPU
.
setLayout
(
new
GridLayout
(
1
,
0
,
5
,
5
));
// rows, columns, vgap, hgap
panelClt_GPU
.
setLayout
(
new
GridLayout
(
1
,
0
,
5
,
5
));
// rows, columns, vgap, hgap
addButton
(
"JCUDA TEST"
,
panelClt_GPU
);
addButton
(
"JCUDA TEST"
,
panelClt_GPU
);
addButton
(
"TF TEST"
,
panelClt_GPU
);
addButton
(
"TF TEST"
,
panelClt_GPU
);
addButton
(
"GPU files"
,
panelClt_GPU
,
color_conf_process
);
addButton
(
"Rig8 gpu"
,
panelClt_GPU
,
color_conf_process
);
addButton
(
"Rig8 gpu"
,
panelClt_GPU
,
color_conf_process
);
addButton
(
"ShowGPU"
,
panelClt_GPU
,
color_conf_process
);
addButton
(
"ShowGPU"
,
panelClt_GPU
,
color_conf_process
);
add
(
panelClt_GPU
);
add
(
panelClt_GPU
);
...
@@ -4579,12 +4580,19 @@ private Panel panel1,
...
@@ -4579,12 +4580,19 @@ private Panel panel1,
EYESIS_CORRECTIONS
.
setDebug
(
DEBUG_LEVEL
);
EYESIS_CORRECTIONS
.
setDebug
(
DEBUG_LEVEL
);
getPairImages2
();
getPairImages2
();
return
;
return
;
/* ======================================================================== */
}
else
if
(
label
.
equals
(
"GPU files"
))
{
DEBUG_LEVEL
=
MASTER_DEBUG_LEVEL
;
EYESIS_CORRECTIONS
.
setDebug
(
DEBUG_LEVEL
);
generateGPUDebugFiles
();
return
;
/* ======================================================================== */
/* ======================================================================== */
}
else
if
(
label
.
equals
(
"Rig8 gpu"
))
{
}
else
if
(
label
.
equals
(
"Rig8 gpu"
))
{
DEBUG_LEVEL
=
MASTER_DEBUG_LEVEL
;
DEBUG_LEVEL
=
MASTER_DEBUG_LEVEL
;
EYESIS_CORRECTIONS
.
setDebug
(
DEBUG_LEVEL
);
EYESIS_CORRECTIONS
.
setDebug
(
DEBUG_LEVEL
);
getPairImages2Gpu
();
getPairImages2Gpu
();
return
;
return
;
/* ======================================================================== */
/* ======================================================================== */
}
else
if
(
label
.
equals
(
"ShowGPU"
))
{
}
else
if
(
label
.
equals
(
"ShowGPU"
))
{
DEBUG_LEVEL
=
MASTER_DEBUG_LEVEL
;
DEBUG_LEVEL
=
MASTER_DEBUG_LEVEL
;
...
@@ -5143,6 +5151,52 @@ private Panel panel1,
...
@@ -5143,6 +5151,52 @@ private Panel panel1,
return
true
;
return
true
;
}
}
public
boolean
generateGPUDebugFiles
()
{
if
(!
prepareRigImages
())
return
false
;
String
configPath
=
getSaveCongigPath
();
if
(
configPath
.
equals
(
"ABORT"
))
return
false
;
if
(
DEBUG_LEVEL
>
-
2
){
System
.
out
.
println
(
"++++++++++++++ Calculating combined correlations ++++++++++++++"
);
}
// reset if ran after 3d model to save memory
if
(
QUAD_CLT
.
tp
!=
null
)
{
QUAD_CLT
.
tp
.
clt_3d_passes
=
null
;
// resetCLTPasses();
}
if
(
QUAD_CLT_AUX
.
tp
!=
null
)
{
QUAD_CLT_AUX
.
tp
.
clt_3d_passes
=
null
;
// resetCLTPasses();
}
try
{
TWO_QUAD_CLT
.
prepareFilesForGPUDebug
(
QUAD_CLT
,
// QuadCLT quadCLT_main,
QUAD_CLT_AUX
,
// QuadCLT quadCLT_aux,
CLT_PARAMETERS
,
// EyesisCorrectionParameters.DCTParameters dct_parameters,
DEBAYER_PARAMETERS
,
//EyesisCorrectionParameters.DebayerParameters debayerParameters,
COLOR_PROC_PARAMETERS
,
//EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
// CHANNEL_GAINS_PARAMETERS, //CorrectionColorProc.ColorGainsParameters channelGainParameters,
// CHANNEL_GAINS_PARAMETERS_AUX, //CorrectionColorProc.ColorGainsParameters channelGainParameters_aux,
RGB_PARAMETERS
,
//EyesisCorrectionParameters.RGBParameters rgbParameters,
THREADS_MAX
,
//final int threadsMax, // maximal number of threads to launch
UPDATE_STATUS
,
//final boolean updateStatus,
DEBUG_LEVEL
);
}
catch
(
Exception
e
)
{
// TODO Auto-generated catch block
e
.
printStackTrace
();
}
//final int debugLevel);
QUAD_CLT
.
tp
.
clt_3d_passes
=
null
;
// resetCLTPasses(); // so running "Ground truth" after would be OK
QUAD_CLT_AUX
.
tp
.
clt_3d_passes
=
null
;
//.resetCLTPasses();
if
(
configPath
!=
null
)
{
saveTimestampedProperties
(
// save config again
configPath
,
// full path or null
null
,
// use as default directory if path==null
true
,
PROPERTIES
);
}
return
true
;
}
public
boolean
getPairImages2Gpu
()
{
public
boolean
getPairImages2Gpu
()
{
if
(!
prepareRigImages
())
return
false
;
if
(!
prepareRigImages
())
return
false
;
String
configPath
=
getSaveCongigPath
();
String
configPath
=
getSaveCongigPath
();
...
@@ -5165,6 +5219,7 @@ private Panel panel1,
...
@@ -5165,6 +5219,7 @@ private Panel panel1,
System
.
out
.
println
(
"Failed to initialize GPU class"
);
System
.
out
.
println
(
"Failed to initialize GPU class"
);
// TODO Auto-generated catch block
// TODO Auto-generated catch block
e
.
printStackTrace
();
e
.
printStackTrace
();
return
false
;
}
//final int debugLevel);
}
//final int debugLevel);
}
}
...
@@ -5186,6 +5241,7 @@ private Panel panel1,
...
@@ -5186,6 +5241,7 @@ private Panel panel1,
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
// TODO Auto-generated catch block
// TODO Auto-generated catch block
e
.
printStackTrace
();
e
.
printStackTrace
();
return
false
;
}
//final int debugLevel);
}
//final int debugLevel);
QUAD_CLT
.
tp
.
clt_3d_passes
=
null
;
// resetCLTPasses(); // so running "Ground truth" after would be OK
QUAD_CLT
.
tp
.
clt_3d_passes
=
null
;
// resetCLTPasses(); // so running "Ground truth" after would be OK
QUAD_CLT_AUX
.
tp
.
clt_3d_passes
=
null
;
//.resetCLTPasses();
QUAD_CLT_AUX
.
tp
.
clt_3d_passes
=
null
;
//.resetCLTPasses();
...
@@ -5203,7 +5259,6 @@ private Panel panel1,
...
@@ -5203,7 +5259,6 @@ private Panel panel1,
public
boolean
rigPlanes
()
{
public
boolean
rigPlanes
()
{
if
((
QUAD_CLT
==
null
)
||
(
QUAD_CLT
.
tp
==
null
)
||
(
QUAD_CLT
.
tp
.
clt_3d_passes
==
null
)
||
(
QUAD_CLT
.
tp
.
clt_3d_passes
.
size
()
==
0
))
{
if
((
QUAD_CLT
==
null
)
||
(
QUAD_CLT
.
tp
==
null
)
||
(
QUAD_CLT
.
tp
.
clt_3d_passes
==
null
)
||
(
QUAD_CLT
.
tp
.
clt_3d_passes
.
size
()
==
0
))
{
String
msg
=
"DSI data is not available. Please run \"CLT 3D\" first"
;
String
msg
=
"DSI data is not available. Please run \"CLT 3D\" first"
;
...
...
src/main/java/GPUTileProcessor.java
View file @
72b6bdce
This diff is collapsed.
Click to expand it.
src/main/java/QuadCLT.java
View file @
72b6bdce
...
@@ -4489,6 +4489,56 @@ public class QuadCLT {
...
@@ -4489,6 +4489,56 @@ public class QuadCLT {
return
rslt
;
return
rslt
;
}
}
// float
public
ImagePlus
linearStackToColor
(
EyesisCorrectionParameters
.
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
ColorProcParameters
colorProcParameters
,
EyesisCorrectionParameters
.
RGBParameters
rgbParameters
,
String
name
,
String
suffix
,
// such as disparity=...
boolean
toRGB
,
boolean
bpp16
,
// 16-bit per channel color mode for result
boolean
saveShowIntermediate
,
// save/show if set globally
boolean
saveShowFinal
,
// save/show result (color image?)
float
[][]
iclt_data
,
int
width
,
// int tilesX,
int
height
,
// int tilesY,
double
scaleExposure
,
int
debugLevel
)
{
showDoubleFloatArrays
sdfa_instance
=
new
showDoubleFloatArrays
();
// just for debugging?
// convert to ImageStack of 3 slices
String
[]
sliceNames
=
{
"red"
,
"blue"
,
"green"
};
float
[]
alpha
=
null
;
// (0..1.0)
float
[][]
rgb_in
=
{
iclt_data
[
0
],
iclt_data
[
1
],
iclt_data
[
2
]};
if
(
iclt_data
.
length
>
3
)
alpha
=
iclt_data
[
3
];
ImageStack
stack
=
sdfa_instance
.
makeStack
(
rgb_in
,
// iclt_data,
width
,
// (tilesX + 0) * clt_parameters.transform_size,
height
,
// (tilesY + 0) * clt_parameters.transform_size,
sliceNames
,
// or use null to get chn-nn slice names
true
);
// replace NaN with 0.0
return
linearStackToColor
(
clt_parameters
,
// EyesisCorrectionParameters.CLTParameters clt_parameters,
colorProcParameters
,
// EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
rgbParameters
,
// EyesisCorrectionParameters.RGBParameters rgbParameters,
name
,
// String name,
suffix
,
// String suffix, // such as disparity=...
toRGB
,
// boolean toRGB,
bpp16
,
// boolean bpp16, // 16-bit per channel color mode for result
saveShowIntermediate
,
// boolean saveShowIntermediate, // save/show if set globally
saveShowFinal
,
// boolean saveShowFinal, // save/show result (color image?)
stack
,
// ImageStack stack,
alpha
,
// float [] alpha_pixels,
width
,
// int width, // int tilesX,
height
,
// int height, // int tilesY,
scaleExposure
,
// double scaleExposure,
debugLevel
);
//int debugLevel
}
// double data
public
ImagePlus
linearStackToColor
(
public
ImagePlus
linearStackToColor
(
EyesisCorrectionParameters
.
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
ColorProcParameters
colorProcParameters
,
EyesisCorrectionParameters
.
ColorProcParameters
colorProcParameters
,
...
@@ -4511,19 +4561,64 @@ public class QuadCLT {
...
@@ -4511,19 +4561,64 @@ public class QuadCLT {
String
[]
sliceNames
=
{
"red"
,
"blue"
,
"green"
};
String
[]
sliceNames
=
{
"red"
,
"blue"
,
"green"
};
double
[]
alpha
=
null
;
// (0..1.0)
double
[]
alpha
=
null
;
// (0..1.0)
double
[][]
rgb_in
=
{
iclt_data
[
0
],
iclt_data
[
1
],
iclt_data
[
2
]};
double
[][]
rgb_in
=
{
iclt_data
[
0
],
iclt_data
[
1
],
iclt_data
[
2
]};
if
(
iclt_data
.
length
>
3
)
alpha
=
iclt_data
[
3
];
float
[]
alpha_pixels
=
null
;
if
(
iclt_data
.
length
>
3
)
{
alpha
=
iclt_data
[
3
];
if
(
alpha
!=
null
){
alpha_pixels
=
new
float
[
alpha
.
length
];
for
(
int
i
=
0
;
i
<
alpha
.
length
;
i
++){
alpha_pixels
[
i
]
=
(
float
)
alpha
[
i
];
}
}
}
ImageStack
stack
=
sdfa_instance
.
makeStack
(
ImageStack
stack
=
sdfa_instance
.
makeStack
(
rgb_in
,
// iclt_data,
rgb_in
,
// iclt_data,
width
,
// (tilesX + 0) * clt_parameters.transform_size,
width
,
// (tilesX + 0) * clt_parameters.transform_size,
height
,
// (tilesY + 0) * clt_parameters.transform_size,
height
,
// (tilesY + 0) * clt_parameters.transform_size,
sliceNames
,
// or use null to get chn-nn slice names
sliceNames
,
// or use null to get chn-nn slice names
true
);
// replace NaN with 0.0
true
);
// replace NaN with 0.0
return
linearStackToColor
(
clt_parameters
,
// EyesisCorrectionParameters.CLTParameters clt_parameters,
colorProcParameters
,
// EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
rgbParameters
,
// EyesisCorrectionParameters.RGBParameters rgbParameters,
name
,
// String name,
suffix
,
// String suffix, // such as disparity=...
toRGB
,
// boolean toRGB,
bpp16
,
// boolean bpp16, // 16-bit per channel color mode for result
saveShowIntermediate
,
// boolean saveShowIntermediate, // save/show if set globally
saveShowFinal
,
// boolean saveShowFinal, // save/show result (color image?)
stack
,
// ImageStack stack,
alpha_pixels
,
// float [] alpha_pixels,
width
,
// int width, // int tilesX,
height
,
// int height, // int tilesY,
scaleExposure
,
// double scaleExposure,
debugLevel
);
//int debugLevel
}
public
ImagePlus
linearStackToColor
(
EyesisCorrectionParameters
.
CLTParameters
clt_parameters
,
EyesisCorrectionParameters
.
ColorProcParameters
colorProcParameters
,
EyesisCorrectionParameters
.
RGBParameters
rgbParameters
,
String
name
,
String
suffix
,
// such as disparity=...
boolean
toRGB
,
boolean
bpp16
,
// 16-bit per channel color mode for result
boolean
saveShowIntermediate
,
// save/show if set globally
boolean
saveShowFinal
,
// save/show result (color image?)
ImageStack
stack
,
float
[]
alpha_pixels
,
int
width
,
// int tilesX,
int
height
,
// int tilesY,
double
scaleExposure
,
int
debugLevel
)
{
// showDoubleFloatArrays sdfa_instance = new showDoubleFloatArrays(); // just for debugging?
if
(
debugLevel
>
-
1
)
{
// 0){
if
(
debugLevel
>
-
1
)
{
// 0){
double
[]
chn_avg
=
{
0.0
,
0.0
,
0.0
};
double
[]
chn_avg
=
{
0.0
,
0.0
,
0.0
};
float
[]
pixels
;
float
[]
pixels
;
// int width = stack.getWidth();
// int height = stack.getHeight();
for
(
int
c
=
0
;
c
<
3
;
c
++){
for
(
int
c
=
0
;
c
<
3
;
c
++){
pixels
=
(
float
[])
stack
.
getPixels
(
c
+
1
);
pixels
=
(
float
[])
stack
.
getPixels
(
c
+
1
);
for
(
int
i
=
0
;
i
<
pixels
.
length
;
i
++){
for
(
int
i
=
0
;
i
<
pixels
.
length
;
i
++){
...
@@ -4610,11 +4705,7 @@ public class QuadCLT {
...
@@ -4610,11 +4705,7 @@ public class QuadCLT {
titleFull
=
name
+
"-YPrPb"
+
suffix
;
titleFull
=
name
+
"-YPrPb"
+
suffix
;
if
(
debugLevel
>
1
)
System
.
out
.
println
(
"Using full stack, including YPbPr"
);
if
(
debugLevel
>
1
)
System
.
out
.
println
(
"Using full stack, including YPbPr"
);
}
}
if
(
alpha
!=
null
){
if
(
alpha_pixels
!=
null
){
float
[]
alpha_pixels
=
new
float
[
alpha
.
length
];
for
(
int
i
=
0
;
i
<
alpha
.
length
;
i
++){
alpha_pixels
[
i
]
=
(
float
)
alpha
[
i
];
}
stack
.
addSlice
(
"alpha"
,
alpha_pixels
);
stack
.
addSlice
(
"alpha"
,
alpha_pixels
);
}
}
...
@@ -4674,6 +4765,8 @@ public class QuadCLT {
...
@@ -4674,6 +4765,8 @@ public class QuadCLT {
}
}
public
void
apply_fine_corr
(
public
void
apply_fine_corr
(
double
[][][]
corr
,
double
[][][]
corr
,
int
debugLevel
)
int
debugLevel
)
...
...
src/main/java/TwoQuadCLT.java
View file @
72b6bdce
This diff is collapsed.
Click to expand it.
src/main/java/showDoubleFloatArrays.java
View file @
72b6bdce
...
@@ -213,6 +213,30 @@ import ij.process.ImageProcessor;
...
@@ -213,6 +213,30 @@ import ij.process.ImageProcessor;
return
array_stack
;
return
array_stack
;
}
}
public
ImageStack
makeStack
(
float
[][]
pixels
,
int
width
,
int
height
,
String
[]
titles
,
boolean
noNaN
)
{
float
[]
fpixels
;
ImageStack
array_stack
=
new
ImageStack
(
width
,
height
);
for
(
int
i
=
0
;
i
<
pixels
.
length
;
i
++)
if
(
pixels
[
i
]!=
null
)
{
if
(
pixels
[
i
].
length
!=(
width
*
height
)){
System
.
out
.
println
(
"showArrays(): pixels["
+
i
+
"].length="
+
pixels
[
i
].
length
+
" != width (+"
+
width
+
") * height("
+
height
+
")="
+(
width
*
height
));
return
null
;
}
if
(
noNaN
){
fpixels
=
new
float
[
pixels
[
i
].
length
];
for
(
int
j
=
0
;
j
<
fpixels
.
length
;
j
++)
fpixels
[
j
]=
Float
.
isNaN
(
pixels
[
i
][
j
])?
0.0
F:
((
float
)
pixels
[
i
][
j
]);
}
else
{
fpixels
=
pixels
[
i
];
}
if
(
titles
!=
null
){
array_stack
.
addSlice
(
titles
[
i
],
fpixels
);
}
else
{
array_stack
.
addSlice
(
"chn-"
+
i
,
fpixels
);
}
}
return
array_stack
;
}
public
ImagePlus
[]
makeArrays
(
double
[][]
pixels
,
int
width
,
int
height
,
String
title
)
{
public
ImagePlus
[]
makeArrays
(
double
[][]
pixels
,
int
width
,
int
height
,
String
title
)
{
int
i
,
j
;
int
i
,
j
;
float
[]
fpixels
;
float
[]
fpixels
;
...
...
src/main/resources/TileProcessor.cuh
View file @
72b6bdce
...
@@ -36,9 +36,24 @@
...
@@ -36,9 +36,24 @@
* \brief Top level of the Tile Processor for frequency domain
* \brief Top level of the Tile Processor for frequency domain
*/
*/
// Avoiding includes in jcuda, all source files will be merged
#ifndef JCUDA
#pragma once
#pragma once
#include "dtt8x8.cuh"
#include "dtt8x8.cuh"
#define THREADSX (DTT_SIZE)
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define NUM_CAMS 4
#define NUM_COLORS 3
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#endif
//#define IMCLT14
//#define IMCLT14
//#define NOICLT 1
//#define NOICLT 1
//#define TEST_IMCLT
//#define TEST_IMCLT
...
@@ -70,7 +85,7 @@
...
@@ -70,7 +85,7 @@
// Removed rest of NOICLT : Average run time =943.456177 ms
// Removed rest of NOICLT : Average run time =943.456177 ms
// Added lpf: Average run time =1046.101318 ms (0.1 sec, 10%) - can be combined with the PSF kernel
// Added lpf: Average run time =1046.101318 ms (0.1 sec, 10%) - can be combined with the PSF kernel
//#define USE_UMUL24
//#define USE_UMUL24
#define TILES_PER_BLOCK 4
////
#define TILES_PER_BLOCK 4
//Average run time =5155.922852 ms
//Average run time =5155.922852 ms
//Average run time =1166.388306 ms
//Average run time =1166.388306 ms
//Average run time =988.750977 ms
//Average run time =988.750977 ms
...
@@ -78,25 +93,16 @@
...
@@ -78,25 +93,16 @@
//Average run time =9656.743164 ms
//Average run time =9656.743164 ms
// Average run time =9422.057617 ms (reducing divergence)
// Average run time =9422.057617 ms (reducing divergence)
//#define TILES_PER_BLOCK 1
//#define TILES_PER_BLOCK 1
#define THREADS_PER_TILE 8
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define NUM_CAMS 4
#define NUM_COLORS 3
#define KERNELS_LSTEP 4
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define IMAGE_TILE_SIDE 18
#define IMCLT_THREADS_PER_TILE 16
//#define THREADS_PER_TILE 8
#define IMCLT_TILES_PER_BLOCK 4
//#define IMCLT_THREADS_PER_TILE 16
//#define IMCLT_TILES_PER_BLOCK 4
#define KERNELS_STEP (1 << KERNELS_LSTEP)
#define KERNELS_STEP (1 << KERNELS_LSTEP)
#define TILESX (IMG_WIDTH / DTT_SIZE)
#define TILESX (IMG_WIDTH / DTT_SIZE)
#define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESY (IMG_HEIGHT / DTT_SIZE)
// increase row length by 1 so vertical passes will use different ports
// increase row length by 1 so vertical passes will use different ports
#define THREADSX (DTT_SIZE)
#define DTT_SIZE1 (DTT_SIZE + 1)
#define DTT_SIZE1 (DTT_SIZE + 1)
#define DTT_SIZE2 (2 * DTT_SIZE)
#define DTT_SIZE2 (2 * DTT_SIZE)
#define DTT_SIZE21 (DTT_SIZE2 + 1)
#define DTT_SIZE21 (DTT_SIZE2 + 1)
...
@@ -124,9 +130,10 @@
...
@@ -124,9 +130,10 @@
// struct tp_task
// struct tp_task
//#define TASK_SIZE 12
//#define TASK_SIZE 12
struct
tp_task
{
struct
tp_task
{
long
task
;
int
task
;
short
ty
;
int
txy
;
short
tx
;
// short ty;
// short tx;
float
xy
[
NUM_CAMS
][
2
];
float
xy
[
NUM_CAMS
][
2
];
};
};
struct
CltExtra
{
struct
CltExtra
{
...
@@ -350,8 +357,9 @@ __device__ void imclt_plane(
...
@@ -350,8 +357,9 @@ __device__ void imclt_plane(
const
size_t
dstride
);
// in floats (pixels)
const
size_t
dstride
);
// in floats (pixels)
extern
"C"
extern
"C"
__global__
void
tileProcessor
(
__global__
void
convert_correct_tiles
(
struct
CltExtra
**
gpu_kernel_offsets
,
// [NUM_CAMS],
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct paraeters
float
**
gpu_kernel_offsets
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_kernels
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
float
**
gpu_images
,
// [NUM_CAMS],
struct
tp_task
*
gpu_tasks
,
struct
tp_task
*
gpu_tasks
,
...
@@ -361,6 +369,7 @@ __global__ void tileProcessor(
...
@@ -361,6 +369,7 @@ __global__ void tileProcessor(
int
lpf_mask
)
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
int
lpf_mask
)
// apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
{
{
// struct CltExtra* gpu_kernel_offsets = (struct CltExtra*) vgpu_kernel_offsets;
dim3
t
=
threadIdx
;
dim3
t
=
threadIdx
;
int
tile_in_block
=
threadIdx
.
y
;
int
tile_in_block
=
threadIdx
.
y
;
int
task_num
=
blockIdx
.
x
*
TILES_PER_BLOCK
+
tile_in_block
;
int
task_num
=
blockIdx
.
x
*
TILES_PER_BLOCK
+
tile_in_block
;
...
@@ -370,8 +379,9 @@ __global__ void tileProcessor(
...
@@ -370,8 +379,9 @@ __global__ void tileProcessor(
__shared__
struct
tp_task
tt
[
TILES_PER_BLOCK
];
__shared__
struct
tp_task
tt
[
TILES_PER_BLOCK
];
// Copy task data to shared memory
// Copy task data to shared memory
tt
[
tile_in_block
].
task
=
gpu_task
->
task
;
tt
[
tile_in_block
].
task
=
gpu_task
->
task
;
tt
[
tile_in_block
].
tx
=
gpu_task
->
tx
;
// tt[tile_in_block].tx = gpu_task -> tx;
tt
[
tile_in_block
].
ty
=
gpu_task
->
ty
;
// tt[tile_in_block].ty = gpu_task -> ty;
tt
[
tile_in_block
].
txy
=
gpu_task
->
txy
;
int
thread0
=
threadIdx
.
x
&
1
;
int
thread0
=
threadIdx
.
x
&
1
;
int
thread12
=
threadIdx
.
x
>>
1
;
int
thread12
=
threadIdx
.
x
>>
1
;
if
(
thread12
<
NUM_CAMS
)
{
if
(
thread12
<
NUM_CAMS
)
{
...
@@ -408,7 +418,7 @@ __global__ void tileProcessor(
...
@@ -408,7 +418,7 @@ __global__ void tileProcessor(
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++
){
for
(
int
ncam
=
0
;
ncam
<
NUM_CAMS
;
ncam
++
){
for
(
int
color
=
0
;
color
<
NUM_COLORS
;
color
++
){
for
(
int
color
=
0
;
color
<
NUM_COLORS
;
color
++
){
convertCorrectTile
(
convertCorrectTile
(
gpu_kernel_offsets
[
ncam
],
// float
* gpu_kernel_offsets,
(
struct
CltExtra
*
)(
gpu_kernel_offsets
[
ncam
]),
// struct CltExtra
* gpu_kernel_offsets,
gpu_kernels
[
ncam
],
// float * gpu_kernels,
gpu_kernels
[
ncam
],
// float * gpu_kernels,
gpu_images
[
ncam
],
// float * gpu_images,
gpu_images
[
ncam
],
// float * gpu_images,
gpu_clt
[
ncam
],
// float * gpu_clt,
gpu_clt
[
ncam
],
// float * gpu_clt,
...
@@ -416,7 +426,8 @@ __global__ void tileProcessor(
...
@@ -416,7 +426,8 @@ __global__ void tileProcessor(
lpf_mask
,
// const int lpf_mask,
lpf_mask
,
// const int lpf_mask,
tt
[
tile_in_block
].
xy
[
ncam
][
0
],
// const float centerX,
tt
[
tile_in_block
].
xy
[
ncam
][
0
],
// const float centerX,
tt
[
tile_in_block
].
xy
[
ncam
][
1
],
// const float centerY,
tt
[
tile_in_block
].
xy
[
ncam
][
1
],
// const float centerY,
tt
[
tile_in_block
].
tx
|
(
tt
[
tile_in_block
].
ty
<<
16
),
// const int txy,
// tt[tile_in_block].tx | (tt[tile_in_block].ty <<16), // const int txy,
tt
[
tile_in_block
].
txy
,
// const int txy,
dstride
,
// size_t dstride, // in floats (pixels)
dstride
,
// size_t dstride, // in floats (pixels)
(
float
*
)(
clt_tile
[
tile_in_block
]),
// float clt_tile [TILES_PER_BLOCK][NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE])
(
float
*
)(
clt_tile
[
tile_in_block
]),
// float clt_tile [TILES_PER_BLOCK][NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE])
(
float
*
)(
clt_kernels
[
tile_in_block
]),
// float clt_tile [NUM_COLORS][4][DTT_SIZE][DTT_SIZE],
(
float
*
)(
clt_kernels
[
tile_in_block
]),
// float clt_tile [NUM_COLORS][4][DTT_SIZE][DTT_SIZE],
...
...
src/main/resources/dtt8x8.cuh
View file @
72b6bdce
...
@@ -44,12 +44,13 @@
...
@@ -44,12 +44,13 @@
* This file is cpompatible with both runtime and driver API, runtime is used for development
* This file is cpompatible with both runtime and driver API, runtime is used for development
* with Nvidia Nsight, driver API when calling these kernels from Java
* with Nvidia Nsight, driver API when calling these kernels from Java
*/
*/
#ifndef JCUDA
#define DTT_SIZE 8
#endif
#pragma once
#pragma once
#define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
#define DTT_SIZE 8
//#define CUDART_INF_F __int_as_float(0x7f800000)
//#define CUDART_INF_F __int_as_float(0x7f800000)
/*
/*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment