Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
imagej-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
3
Issues
3
List
Board
Labels
Milestones
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Elphel
imagej-elphel
Commits
0123f06e
Commit
0123f06e
authored
Jul 21, 2025
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Unified GPU for DP2/no DP2 (12.6.0/11.2.0)
parent
8755d17e
Changes
2
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
197 additions
and
57 deletions
+197
-57
GPUTileProcessor.java
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+30
-13
GpuQuad.java
src/main/java/com/elphel/imagej/gpu/GpuQuad.java
+167
-44
No files found.
src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
View file @
0123f06e
...
@@ -6,7 +6,7 @@ package com.elphel.imagej.gpu;
...
@@ -6,7 +6,7 @@ package com.elphel.imagej.gpu;
** GPU acceleration for the Tile Processor
** GPU acceleration for the Tile Processor
**
**
**
**
** Copyright (C) 2018 Elphel, Inc.
** Copyright (C) 2018
-2025
Elphel, Inc.
**
**
** -----------------------------------------------------------------------------**
** -----------------------------------------------------------------------------**
**
**
...
@@ -48,7 +48,7 @@ import static jcuda.nvrtc.JNvrtc.nvrtcCreateProgram;
...
@@ -48,7 +48,7 @@ import static jcuda.nvrtc.JNvrtc.nvrtcCreateProgram;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcDestroyProgram
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcDestroyProgram
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcGetPTX
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcGetPTX
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcGetProgramLog
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcGetProgramLog
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcVersion
;
//
import static jcuda.nvrtc.JNvrtc.nvrtcVersion;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcGetNumSupportedArchs
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcGetNumSupportedArchs
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcGetSupportedArchs
;
import
static
jcuda
.
nvrtc
.
JNvrtc
.
nvrtcGetSupportedArchs
;
...
@@ -62,6 +62,7 @@ import com.elphel.imagej.tileprocessor.Correlation2d;
...
@@ -62,6 +62,7 @@ import com.elphel.imagej.tileprocessor.Correlation2d;
import
ij.IJ
;
import
ij.IJ
;
import
ij.text.TextWindow
;
import
ij.text.TextWindow
;
import
jcuda.JCudaVersion
;
import
jcuda.Pointer
;
import
jcuda.Pointer
;
import
jcuda.driver.CUcontext
;
import
jcuda.driver.CUcontext
;
import
jcuda.driver.CUdevice
;
import
jcuda.driver.CUdevice
;
...
@@ -75,16 +76,20 @@ import jcuda.nvrtc.JNvrtc;
...
@@ -75,16 +76,20 @@ import jcuda.nvrtc.JNvrtc;
import
jcuda.nvrtc.nvrtcProgram
;
import
jcuda.nvrtc.nvrtcProgram
;
public
class
GPUTileProcessor
{
public
class
GPUTileProcessor
{
public
static
String
CUDA_VERSION
=
JCudaVersion
.
get
();
public
static
boolean
USE_CUDA12
=
CUDA_VERSION
.
startsWith
(
"12."
);
public
static
boolean
USE_DS_DP
=
false
;
// Use Dynamic Shared memory with Dynamic Parallelism (not implemented)
public
static
boolean
USE_DS_DP
=
false
;
// Use Dynamic Shared memory with Dynamic Parallelism (not implemented)
String
LIBRARY_PATH
=
"/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a"
;
// linux
String
LIBRARY_PATH
=
"/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a"
;
// linux
// Can be downloaded and twice extracted from
// Can be downloaded and twice extracted from
// https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cudart-dev-11-2_11.2.152-1_amd64.deb
// https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cudart-dev-11-2_11.2.152-1_amd64.deb
// First deb itself, then data.tar.xz, and it will have usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a inside
// First deb itself, then data.tar.xz, and it will have usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a inside
// Found "cuda-cudart-dev" on https://ubuntu.pkgs.org/
// Found "cuda-cudart-dev" on https://ubuntu.pkgs.org/
static
String
GPU_RESOURCE_DIR
=
"kernels"
;
static
String
GPU_RESOURCE_
TOP_
DIR
=
"kernels"
;
static
String
[]
GPU_KERNEL_FILES
=
{
"dtt8x8.cuh"
,
"TileProcessor.cuh"
};
static
String
[]
GPU_KERNEL_FILES
=
{
"dtt8x8.cuh"
,
"TileProcessor.cuh"
};
// "*" - generated defines, first index - separately compiled unit
// "*" - generated defines, first index - separately compiled unit
static
String
[][]
GPU_SRC_FILES
=
{{
"*"
,
"dtt8x8.h"
,
"dtt8x8.cu"
,
"geometry_correction.h"
,
"geometry_correction.cu"
,
"TileProcessor.h"
,
"TileProcessor.cuh"
}};
static
String
[][]
GPU_SRC_FILES
=
USE_CUDA12
?
(
new
String
[][]
{{
"*"
,
"dtt8x8.h"
,
"dtt8x8.cu"
,
"geometry_correction.h"
,
"geometry_correction.cu"
,
"TileProcessor.h"
,
"TileProcessor.cu"
}}):
(
new
String
[][]
{{
"*"
,
"dtt8x8.h"
,
"dtt8x8.cu"
,
"geometry_correction.h"
,
"geometry_correction.cu"
,
"TileProcessor.h"
,
"TileProcessor.cuh"
}});
static
String
GPU_CONVERT_DIRECT_NAME
=
"convert_direct"
;
// name in C code
static
String
GPU_CONVERT_DIRECT_NAME
=
"convert_direct"
;
// name in C code
static
String
GPU_IMCLT_ALL_NAME
=
"imclt_rbg_all"
;
static
String
GPU_IMCLT_ALL_NAME
=
"imclt_rbg_all"
;
static
String
GPU_CORRELATE2D_NAME
=
"correlate2D"
;
// name in C code
static
String
GPU_CORRELATE2D_NAME
=
"correlate2D"
;
// name in C code
...
@@ -270,7 +275,6 @@ public class GPUTileProcessor {
...
@@ -270,7 +275,6 @@ public class GPUTileProcessor {
public
GPUTileProcessor
(
public
GPUTileProcessor
(
String
cuda_project_directory
)
throws
IOException
String
cuda_project_directory
)
throws
IOException
{
{
// From code by Marco Hutter - http://www.jcuda.org
// From code by Marco Hutter - http://www.jcuda.org
// Enable exceptions and omit all subsequent error checks
// Enable exceptions and omit all subsequent error checks
JCudaDriver
.
setExceptionsEnabled
(
true
);
JCudaDriver
.
setExceptionsEnabled
(
true
);
...
@@ -310,7 +314,7 @@ public class GPUTileProcessor {
...
@@ -310,7 +314,7 @@ public class GPUTileProcessor {
}
else
{
}
else
{
File
file
=
null
;
File
file
=
null
;
if
((
cuda_project_directory
==
null
)
||
cuda_project_directory
.
isEmpty
())
{
if
((
cuda_project_directory
==
null
)
||
cuda_project_directory
.
isEmpty
())
{
file
=
new
File
(
classLoader
.
getResource
(
GPU_RESOURCE_
DIR
+
"/"
+
src_file
).
getFile
());
file
=
new
File
(
classLoader
.
getResource
(
GPU_RESOURCE_
TOP_DIR
+
"/"
+
CUDA_VERSION
+
"/"
+
src_file
).
getFile
());
System
.
out
.
println
(
"Loading resource "
+
file
);
System
.
out
.
println
(
"Loading resource "
+
file
);
}
else
{
}
else
{
File
src_dir
=
new
File
(
cuda_project_directory
,
"src"
);
File
src_dir
=
new
File
(
cuda_project_directory
,
"src"
);
...
@@ -507,22 +511,35 @@ public class GPUTileProcessor {
...
@@ -507,22 +511,35 @@ public class GPUTileProcessor {
// Use the NVRTC to create a program by compiling the source code
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram
program
=
new
nvrtcProgram
();
nvrtcProgram
program
=
new
nvrtcProgram
();
nvrtcCreateProgram
(
program
,
sourceCode
,
null
,
0
,
null
,
null
);
nvrtcCreateProgram
(
program
,
sourceCode
,
null
,
0
,
null
,
null
);
String
options
[]
=
{
"--gpu-architecture=compute_"
+
capability
};
//
String options[] = {"--gpu-architecture=compute_"+capability};
int
[][]
nvrtc_version
=
new
int
[
2
][];
//
int [][] nvrtc_version = new int[2][];
int
nvrtc_rslt
=
-
1
;
int
nvrtc_rslt
=
-
1
;
/*
nvrtc_rslt= nvrtcVersion(nvrtc_version[0],nvrtc_version[0]);
System.out.println("nvrtcVersion="+nvrtc_version[0][0]+"."+nvrtc_version[1][0]+" (returned "+nvrtc_rslt+").");
*/
int
[]
nvrtc_num_arch
=
new
int
[
1
];
int
[]
nvrtc_num_arch
=
new
int
[
1
];
nvrtc_rslt
=
nvrtcGetNumSupportedArchs
(
nvrtc_num_arch
);
nvrtc_rslt
=
nvrtcGetNumSupportedArchs
(
nvrtc_num_arch
);
System
.
out
.
println
(
"nvrtc_num_arch="
+
nvrtc_num_arch
[
0
]+
" (returned "
+
nvrtc_rslt
+
")."
);
System
.
out
.
println
(
"nvrtc_num_arch="
+
nvrtc_num_arch
[
0
]+
" (returned "
+
nvrtc_rslt
+
")."
);
int
[]
nvrtc_archs
=
new
int
[
nvrtc_num_arch
[
0
]];
int
[]
nvrtc_archs
=
new
int
[
nvrtc_num_arch
[
0
]];
nvrtc_rslt
=
nvrtcGetSupportedArchs
(
nvrtc_archs
);
nvrtc_rslt
=
nvrtcGetSupportedArchs
(
nvrtc_archs
);
int
max_arch
=
0
;
for
(
int
sa:
nvrtc_archs
)
{
max_arch
=
Math
.
max
(
max_arch
,
sa
);
}
for
(
int
sa:
nvrtc_archs
)
{
for
(
int
sa:
nvrtc_archs
)
{
System
.
out
.
println
(
"Supported arch "
+
sa
);
System
.
out
.
println
(
"Supported arch "
+
sa
);
}
}
System
.
out
.
println
();
System
.
out
.
println
(
"Max supported arch is "
+
max_arch
+
", gpu capability = "
+
capability
);
if
(
capability
>
max_arch
)
{
capability
=
max_arch
;
System
.
out
.
println
(
"Reduced capability to match NVRTC compiler to "
+
capability
);
}
String
options
[]
=
new
String
[
USE_CUDA12
?
2
:
1
];
options
[
0
]
=
"--gpu-architecture=compute_"
+
capability
;
if
(
options
.
length
>
1
)
{
options
[
1
]
=
"--extensible-whole-program"
;
}
System
.
out
.
println
(
"Running NVRTC with the following options:"
);
for
(
String
s:
options
)
{
System
.
out
.
println
(
s
);
}
try
{
try
{
nvrtcCompileProgram
(
program
,
options
.
length
,
options
);
nvrtcCompileProgram
(
program
,
options
.
length
,
options
);
OK
=
true
;
OK
=
true
;
...
...
src/main/java/com/elphel/imagej/gpu/GpuQuad.java
View file @
0123f06e
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment