Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
f04bcc82
Commit
f04bcc82
authored
Sep 20, 2018
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
working on GPU code
parent
cd9b6096
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
913 additions
and
17 deletions
+913
-17
.gitignore
.gitignore
+2
-1
GPUTileProcessor.java
src/main/java/GPUTileProcessor.java
+9
-10
ImageDtt.java
src/main/java/ImageDtt.java
+786
-1
TwoQuadCLT.java
src/main/java/TwoQuadCLT.java
+115
-5
dtt8x8.cuh
src/main/resources/dtt8x8.cuh
+1
-0
No files found.
.gitignore
View file @
f04bcc82
...
@@ -7,3 +7,4 @@ NC393I
...
@@ -7,3 +7,4 @@ NC393I
attic
attic
*.log
*.log
FOCUS-PSF*
FOCUS-PSF*
src/main/resources/trained_model
\ No newline at end of file
src/main/java/GPUTileProcessor.java
View file @
f04bcc82
...
@@ -117,7 +117,7 @@ public class GPUTileProcessor {
...
@@ -117,7 +117,7 @@ public class GPUTileProcessor {
copyH2D
.
WidthInBytes
=
width_in_bytes
;
copyH2D
.
WidthInBytes
=
width_in_bytes
;
copyH2D
.
Height
=
height
;
// /4;
copyH2D
.
Height
=
height
;
// /4;
// for copying results
back
to host
// for copying results to host
CUDA_MEMCPY2D
copyD2H
=
new
CUDA_MEMCPY2D
();
CUDA_MEMCPY2D
copyD2H
=
new
CUDA_MEMCPY2D
();
copyD2H
.
srcMemoryType
=
CUmemorytype
.
CU_MEMORYTYPE_DEVICE
;
copyD2H
.
srcMemoryType
=
CUmemorytype
.
CU_MEMORYTYPE_DEVICE
;
copyD2H
.
srcDevice
=
dst_dpointer
;
// ((test & 1) ==0) ? src_dpointer : dst_dpointer; // copy same data
copyD2H
.
srcDevice
=
dst_dpointer
;
// ((test & 1) ==0) ? src_dpointer : dst_dpointer; // copy same data
...
@@ -130,8 +130,7 @@ public class GPUTileProcessor {
...
@@ -130,8 +130,7 @@ public class GPUTileProcessor {
copyD2H
.
WidthInBytes
=
width_in_bytes
;
copyD2H
.
WidthInBytes
=
width_in_bytes
;
copyD2H
.
Height
=
height
;
// /2;
copyD2H
.
Height
=
height
;
// /2;
// Set up the kernel parameters: A pointer to an array
// kernel parameters: pointer to pointers
// of pointers which point to the actual values.
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
kernelParameters
=
Pointer
.
to
(
Pointer
.
to
(
dst_dpointer
),
Pointer
.
to
(
dst_dpointer
),
Pointer
.
to
(
src_dpointer
),
Pointer
.
to
(
src_dpointer
),
...
@@ -152,16 +151,16 @@ public class GPUTileProcessor {
...
@@ -152,16 +151,16 @@ public class GPUTileProcessor {
0
,
null
,
// Shared memory size and stream (shared - only dynamic, static is in code)
0
,
null
,
// Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters
,
null
);
// Kernel- and extra parameters
kernelParameters
,
null
);
// Kernel- and extra parameters
// Copy the data from the device
back
to the host
// Copy the data from the device to the host
cuMemcpy2D
(
copyD2H
);
cuMemcpy2D
(
copyD2H
);
// clean up
// clean up
cuMemFree
(
src_dpointer
);
cuMemFree
(
src_dpointer
);
cuMemFree
(
dst_dpointer
);
cuMemFree
(
dst_dpointer
);
}
}
public
int
setup
()
throws
IOException
// String arg, ImagePlus imagePlus)
public
int
setup
()
throws
IOException
{
{
// From code by Marco Hutter - http://www.jcuda.org
// Enable exceptions and omit all subsequent error checks
// Enable exceptions and omit all subsequent error checks
JCudaDriver
.
setExceptionsEnabled
(
true
);
JCudaDriver
.
setExceptionsEnabled
(
true
);
JNvrtc
.
setExceptionsEnabled
(
true
);
JNvrtc
.
setExceptionsEnabled
(
true
);
...
@@ -174,7 +173,9 @@ public class GPUTileProcessor {
...
@@ -174,7 +173,9 @@ public class GPUTileProcessor {
cuCtxCreate
(
context
,
0
,
device
);
cuCtxCreate
(
context
,
0
,
device
);
// Obtain the CUDA source code from the CUDA file
// Obtain the CUDA source code from the CUDA file
// Get absolute path to the file in resource foldder, then read it as a normal file.
// When using just Eclipse resources - it does not notice that the file
// was edited (happens frequently during kernel development).
ClassLoader
classLoader
=
getClass
().
getClassLoader
();
ClassLoader
classLoader
=
getClass
().
getClassLoader
();
File
file
=
new
File
(
classLoader
.
getResource
(
GPU_KERNEL_FILE
).
getFile
());
File
file
=
new
File
(
classLoader
.
getResource
(
GPU_KERNEL_FILE
).
getFile
());
System
.
out
.
println
(
file
.
getAbsolutePath
());
System
.
out
.
println
(
file
.
getAbsolutePath
());
...
@@ -196,9 +197,7 @@ public class GPUTileProcessor {
...
@@ -196,9 +197,7 @@ public class GPUTileProcessor {
}
}
/**
/**
* Create the CUDA function object for the kernel function with the
* Create the kernel function by its name in the source code
* given name that is contained in the given source code
*
* @param sourceCode The source code
* @param sourceCode The source code
* @param kernelName The kernel function name
* @param kernelName The kernel function name
* @return
* @return
...
...
src/main/java/ImageDtt.java
View file @
f04bcc82
This diff is collapsed.
Click to expand it.
src/main/java/TwoQuadCLT.java
View file @
f04bcc82
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
** -----------------------------------------------------------------------------**
** -----------------------------------------------------------------------------**
**
**
*/
*/
import
java.io.DataOutputStream
;
import
java.io.File
;
import
java.io.File
;
import
java.io.FileNotFoundException
;
import
java.io.FileNotFoundException
;
import
java.io.FileOutputStream
;
import
java.io.FileOutputStream
;
...
@@ -907,7 +908,80 @@ public class TwoQuadCLT {
...
@@ -907,7 +908,80 @@ public class TwoQuadCLT {
return
results
;
return
results
;
}
}
public
void
saveFloatKernels
(
String
file_prefix
,
double
[][][][][][]
clt_kernels
,
double
[][][]
image_data
,
double
[][][]
port_xy
,
boolean
transpose
)
throws
IOException
{
if
(
clt_kernels
!=
null
)
{
for
(
int
chn
=
0
;
chn
<
clt_kernels
.
length
;
chn
++)
{
String
kern_path
=
file_prefix
+
"_chn"
+
chn
+(
transpose
?
"_transposed"
:
""
)+
".kernel"
;
String
offs_path
=
file_prefix
+
"_chn"
+
chn
+(
transpose
?
"_transposed"
:
""
)+
".kernel_offsets"
;
FileOutputStream
fos
=
new
FileOutputStream
(
kern_path
);
DataOutputStream
dos
=
new
DataOutputStream
(
fos
);
for
(
int
ty
=
0
;
ty
<
clt_kernels
[
chn
][
0
].
length
;
ty
++)
{
for
(
int
tx
=
0
;
tx
<
clt_kernels
[
chn
][
0
][
ty
].
length
;
tx
++)
{
for
(
int
col
=
0
;
col
<
clt_kernels
[
chn
].
length
;
col
++)
{
for
(
int
p
=
0
;
p
<
4
;
p
++)
{
double
[]
pa
=
clt_kernels
[
chn
][
col
][
ty
][
tx
][
p
];
for
(
int
i0
=
0
;
i0
<
64
;
i0
++)
{
int
i
;
if
(
transpose
)
{
i
=
((
i0
&
7
)
<<
3
)
+
((
i0
>>
3
)
&
7
);
}
else
{
i
=
i0
;
}
dos
.
writeFloat
((
float
)
pa
[
i
]);
}
}
}
}
}
dos
.
close
();
fos
=
new
FileOutputStream
(
offs_path
);
dos
=
new
DataOutputStream
(
fos
);
for
(
int
ty
=
0
;
ty
<
clt_kernels
[
chn
][
0
].
length
;
ty
++)
{
for
(
int
tx
=
0
;
tx
<
clt_kernels
[
chn
][
0
][
ty
].
length
;
tx
++)
{
for
(
int
col
=
0
;
col
<
clt_kernels
[
chn
].
length
;
col
++)
{
double
[]
pa
=
clt_kernels
[
chn
][
col
][
ty
][
tx
][
4
];
for
(
int
i
=
0
;
i
<
pa
.
length
;
i
++)
{
dos
.
writeFloat
((
float
)
pa
[
i
]);
}
}
}
}
dos
.
close
();
}
}
if
(
image_data
!=
null
)
{
for
(
int
chn
=
0
;
chn
<
image_data
.
length
;
chn
++)
{
String
img_path
=
file_prefix
+
"_chn"
+
chn
+
".bayer"
;
FileOutputStream
fos
=
new
FileOutputStream
(
img_path
);
DataOutputStream
dos
=
new
DataOutputStream
(
fos
);
for
(
int
i
=
0
;
i
<
image_data
[
chn
][
0
].
length
;
i
++)
{
dos
.
writeFloat
((
float
)
(
image_data
[
chn
][
0
][
i
]
+
image_data
[
chn
][
1
][
i
]
+
image_data
[
chn
][
2
][
i
]));
}
dos
.
close
();
}
}
if
(
port_xy
!=
null
)
{
for
(
int
chn
=
0
;
chn
<
port_xy
[
0
].
length
;
chn
++)
{
String
img_path
=
file_prefix
+
"_chn"
+
chn
+
".portsxy"
;
FileOutputStream
fos
=
new
FileOutputStream
(
img_path
);
DataOutputStream
dos
=
new
DataOutputStream
(
fos
);
for
(
int
i
=
0
;
i
<
port_xy
.
length
;
i
++)
{
dos
.
writeFloat
((
float
)
(
port_xy
[
i
][
chn
][
0
]));
// x-offset
dos
.
writeFloat
((
float
)
(
port_xy
[
i
][
chn
][
1
]));
// y-offset
}
dos
.
close
();
}
}
}
public
ImagePlus
[]
processCLTQuadCorrPairGpu
(
public
ImagePlus
[]
processCLTQuadCorrPairGpu
(
GPUTileProcessor
gPUTileProcessor
,
GPUTileProcessor
gPUTileProcessor
,
...
@@ -1003,6 +1077,8 @@ public class TwoQuadCLT {
...
@@ -1003,6 +1077,8 @@ public class TwoQuadCLT {
double
[][][][][][]
clt_kernels_main
=
quadCLT_main
.
getCLTKernels
();
// [4][3][123][164]{[64],[64],[64],[64],[8]}
double
[][][][][][]
clt_kernels_main
=
quadCLT_main
.
getCLTKernels
();
// [4][3][123][164]{[64],[64],[64],[64],[8]}
double
[][][][][][]
clt_kernels_aux
=
quadCLT_aux
.
getCLTKernels
();
double
[][][][][][]
clt_kernels_aux
=
quadCLT_aux
.
getCLTKernels
();
//[4][3][123][164][5][]
double
[][]
dbg_kern
=
clt_kernels_main
[
0
][
0
][
0
][
0
];
double
[][]
dbg_kern
=
clt_kernels_main
[
0
][
0
][
0
][
0
];
// here all data is ready (images, kernels) to try GPU code
// here all data is ready (images, kernels) to try GPU code
...
@@ -1033,13 +1109,12 @@ public class TwoQuadCLT {
...
@@ -1033,13 +1109,12 @@ public class TwoQuadCLT {
"converted"
,
"converted"
,
dbg_titles
);
dbg_titles
);
if
(
debugLevel
<
1000
)
{
return
null
;
}
double
[][][]
port_xy_main_dbg
=
new
double
[
tilesX
*
tilesY
][][];
double
[][][]
port_xy_aux_dbg
=
new
double
[
tilesX
*
tilesY
][][];
final
double
[][][][][][][]
clt_bidata
=
// new double[2][quad][nChn][tilesY][tilesX][][]; // first index - main/aux
final
double
[][][][][][][]
clt_bidata
=
// new double[2][quad][nChn][tilesY][tilesX][][]; // first index - main/aux
image_dtt
.
clt_bi_quad
(
image_dtt
.
clt_bi_quad
_dbg
(
clt_parameters
,
// final EyesisCorrectionParameters.CLTParameters clt_parameters,
clt_parameters
,
// final EyesisCorrectionParameters.CLTParameters clt_parameters,
clt_parameters
.
fat_zero
,
// final double fatzero, // May use correlation fat zero from 2 different parameters - fat_zero and rig.ml_fatzero
clt_parameters
.
fat_zero
,
// final double fatzero, // May use correlation fat zero from 2 different parameters - fat_zero and rig.ml_fatzero
notch_mode
,
// final boolean notch_mode, // use notch filter for inter-camera correlation to detect poles
notch_mode
,
// final boolean notch_mode, // use notch filter for inter-camera correlation to detect poles
...
@@ -1070,9 +1145,44 @@ public class TwoQuadCLT {
...
@@ -1070,9 +1145,44 @@ public class TwoQuadCLT {
// woi_tops, // final int [][] woi_tops,
// woi_tops, // final int [][] woi_tops,
ers_delay
,
// final double [][][] ers_delay, // if not null - fill with tile center acquisition delay
ers_delay
,
// final double [][][] ers_delay, // if not null - fill with tile center acquisition delay
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
threadsMax
,
// final int threadsMax, // maximal number of threads to launch
debugLevel
);
// final int globalDebugLevel);
debugLevel
,
// final int globalDebugLevel);
port_xy_main_dbg
,
// final double [][][] port_xy_main_dbg, // for each tile/port save x,y pixel coordinates (gpu code development)
port_xy_aux_dbg
);
// final double [][][] port_xy_aux_dbg) // for each tile/port save x,y pixel coordinates (gpu code development)
String
kernel_dir
=
"/home/eyesis/workspace-python3/nvidia_dct8x8/clt/"
;
boolean
[][]
what_to_save
=
{{
false
,
false
,
true
},
{
false
,
false
,
true
}};
try
{
saveFloatKernels
(
kernel_dir
+
"main"
,
// String file_prefix,
(
what_to_save
[
0
][
0
]?
clt_kernels_main:
null
),
// double [][][][][][] clt_kernels, // null
(
what_to_save
[
0
][
1
]?
quadCLT_main
.
image_data
:
null
),
(
what_to_save
[
0
][
2
]?
port_xy_main_dbg:
null
),
// double [][][] port_xy,
true
);
}
catch
(
IOException
e
)
{
System
.
out
.
println
(
"Failed to save flattened kernels tp "
+
kernel_dir
);
// TODO Auto-generated catch block
e
.
printStackTrace
();
}
// boolean transpose);
try
{
saveFloatKernels
(
kernel_dir
+
"aux"
,
// String file_prefix,
(
what_to_save
[
1
][
0
]?
clt_kernels_aux:
null
),
// double [][][][][][] clt_kernels, // null
(
what_to_save
[
1
][
1
]?
quadCLT_aux
.
image_data
:
null
),
(
what_to_save
[
1
][
2
]?
port_xy_aux_dbg:
null
),
// double [][][] port_xy,
true
);
}
catch
(
IOException
e
)
{
System
.
out
.
println
(
"Failed to save flattened kernels tp "
+
kernel_dir
);
// TODO Auto-generated catch block
e
.
printStackTrace
();
}
// boolean transpose);
if
(
debugLevel
<
1000
)
{
return
null
;
}
if
(
ers_delay
!=
null
)
{
if
(
ers_delay
!=
null
)
{
showERSDelay
(
ers_delay
);
showERSDelay
(
ers_delay
);
}
}
...
...
src/main/resources/dtt8x8.cuh
View file @
f04bcc82
...
@@ -43,6 +43,7 @@
...
@@ -43,6 +43,7 @@
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
#define DTT_SIZE 8
#define DTT_SIZE 8
//#define CUDART_INF_F __int_as_float(0x7f800000)
/*
/*
Python code to generate constant coefficients:
Python code to generate constant coefficients:
def dct_constants():
def dct_constants():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment