Commit 2516bcc1 authored by Andrey Filippov's avatar Andrey Filippov

updated to jcuda 12.6

parent 122a22bb
...@@ -58,7 +58,8 @@ ...@@ -58,7 +58,8 @@
<groupId>org.jcuda</groupId> <groupId>org.jcuda</groupId>
<artifactId>jcuda</artifactId> <artifactId>jcuda</artifactId>
<!-- <version>10.1.0</version> --> <!-- <version>10.1.0</version> -->
<version>11.2.0</version> <!--<version>11.2.0</version> -->
<version>12.6.0</version>
</dependency> </dependency>
<!-- <!--
As of 2018/09/11 TF for GPU on Maven supports CUDA 9.0 (vs latest 9.2) As of 2018/09/11 TF for GPU on Maven supports CUDA 9.0 (vs latest 9.2)
......
...@@ -6,7 +6,7 @@ package com.elphel.imagej.gpu; ...@@ -6,7 +6,7 @@ package com.elphel.imagej.gpu;
** GPU acceleration for the Tile Processor ** GPU acceleration for the Tile Processor
** **
** **
** Copyright (C) 2018 Elphel, Inc. ** Copyright (C) 2018-2025 Elphel, Inc.
** **
** -----------------------------------------------------------------------------** ** -----------------------------------------------------------------------------**
** **
...@@ -72,16 +72,18 @@ import jcuda.nvrtc.JNvrtc; ...@@ -72,16 +72,18 @@ import jcuda.nvrtc.JNvrtc;
import jcuda.nvrtc.nvrtcProgram; import jcuda.nvrtc.nvrtcProgram;
public class GPUTileProcessor { public class GPUTileProcessor {
public static boolean USE_DS_DP = false; // Use Dynamic Shared memory with Dynamic Parallelism (not implemented) public static boolean USE_DS_DP = true; // false; // Use Dynamic Shared memory with Dynamic Parallelism (not implemented)
String LIBRARY_PATH = "/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a"; // linux String LIBRARY_PATH = "/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a"; // linux
// Can be downloaded and twice extracted from // Can be downloaded and twice extracted from
// https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cudart-dev-11-2_11.2.152-1_amd64.deb // https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cudart-dev-11-2_11.2.152-1_amd64.deb
// First deb itself, then data.tar.xz, and it will have usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a inside // First deb itself, then data.tar.xz, and it will have usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a inside
// Found "cuda-cudart-dev" on https://ubuntu.pkgs.org/ // Found "cuda-cudart-dev" on https://ubuntu.pkgs.org/
static String GPU_RESOURCE_DIR = "kernels"; static String GPU_RESOURCE_DIR = "kernels";
static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"}; // static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"}; // was never used and dtt8x8.cuh had incorrect name
// static String [] GPU_KERNEL_FILES = {"dtt8x8.cu","TileProcessor.cu"};
// "*" - generated defines, first index - separately compiled unit // "*" - generated defines, first index - separately compiled unit
static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}}; // static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cu"}};
static String GPU_CONVERT_DIRECT_NAME = "convert_direct"; // name in C code static String GPU_CONVERT_DIRECT_NAME = "convert_direct"; // name in C code
static String GPU_IMCLT_ALL_NAME = "imclt_rbg_all"; static String GPU_IMCLT_ALL_NAME = "imclt_rbg_all";
static String GPU_CORRELATE2D_NAME = "correlate2D"; // name in C code static String GPU_CORRELATE2D_NAME = "correlate2D"; // name in C code
...@@ -89,7 +91,7 @@ public class GPUTileProcessor { ...@@ -89,7 +91,7 @@ public class GPUTileProcessor {
static String GPU_CORR2D_COMBINE_NAME = "corr2D_combine"; // name in C code static String GPU_CORR2D_COMBINE_NAME = "corr2D_combine"; // name in C code
static String GPU_CORR2D_NORMALIZE_NAME = "corr2D_normalize"; // name in C code static String GPU_CORR2D_NORMALIZE_NAME = "corr2D_normalize"; // name in C code
static String GPU_TEXTURES_NAME = "textures_nonoverlap"; // name in C code static String GPU_TEXTURES_NAME = "textures_nonoverlap"; // name in C code
static String GPU_RBGA_NAME = "generate_RBGA"; // name in C code static String GPU_RBGA_NAME = "generate_RBGA"; // name in C code //// *** Modified 2025 *** ////
static String GPU_ROT_DERIV = "calc_rot_deriv"; // calculate rotation matrices and derivatives static String GPU_ROT_DERIV = "calc_rot_deriv"; // calculate rotation matrices and derivatives
static String GPU_SET_TILES_OFFSETS = "get_tiles_offsets"; // calculate pixel offsets and disparity distortions static String GPU_SET_TILES_OFFSETS = "get_tiles_offsets"; // calculate pixel offsets and disparity distortions
static String GPU_CALCULATE_TILES_OFFSETS = "calculate_tiles_offsets"; // calculate pixel offsets and disparity distortions static String GPU_CALCULATE_TILES_OFFSETS = "calculate_tiles_offsets"; // calculate pixel offsets and disparity distortions
...@@ -100,7 +102,7 @@ public class GPUTileProcessor { ...@@ -100,7 +102,7 @@ public class GPUTileProcessor {
static String GPU_MARK_TEXTURE_NEIGHBOR_NAME = "mark_texture_neighbor_tiles"; static String GPU_MARK_TEXTURE_NEIGHBOR_NAME = "mark_texture_neighbor_tiles";
static String GPU_GEN_TEXTURE_LIST_NAME = "gen_texture_list"; static String GPU_GEN_TEXTURE_LIST_NAME = "gen_texture_list";
static String GPU_CLEAR_TEXTURE_RBGA_NAME = "clear_texture_rbga"; static String GPU_CLEAR_TEXTURE_RBGA_NAME = "clear_texture_rbga";
static String GPU_TEXTURES_ACCUMULATE_NAME = "textures_accumulate"; static String GPU_TEXTURES_ACCUMULATE_NAME = "textures_accumulate"; //// *** Modified 2025 *** ////
static String GPU_CREATE_NONOVERLAP_LIST_NAME ="create_nonoverlap_list"; static String GPU_CREATE_NONOVERLAP_LIST_NAME ="create_nonoverlap_list";
static String GPU_ERASE_CLT_TILES_NAME = "erase_clt_tiles"; static String GPU_ERASE_CLT_TILES_NAME = "erase_clt_tiles";
...@@ -298,7 +300,7 @@ public class GPUTileProcessor { ...@@ -298,7 +300,7 @@ public class GPUTileProcessor {
ClassLoader classLoader = getClass().getClassLoader(); ClassLoader classLoader = getClass().getClassLoader();
String [] kernelSources = new String[GPU_SRC_FILES.length]; String [] kernelSources = new String[GPU_SRC_FILES.length];
boolean show_source = false; // true; boolean show_source = true; // false; // true;
for (int cunit = 0; cunit < kernelSources.length; cunit++) { for (int cunit = 0; cunit < kernelSources.length; cunit++) {
kernelSources[cunit] = ""; // use StringBuffer? kernelSources[cunit] = ""; // use StringBuffer?
for (String src_file:GPU_SRC_FILES[cunit]) { for (String src_file:GPU_SRC_FILES[cunit]) {
...@@ -370,7 +372,7 @@ public class GPUTileProcessor { ...@@ -370,7 +372,7 @@ public class GPUTileProcessor {
GPU_CORR2D_COMBINE_kernel = functions[4]; GPU_CORR2D_COMBINE_kernel = functions[4];
GPU_CORR2D_NORMALIZE_kernel = functions[5]; GPU_CORR2D_NORMALIZE_kernel = functions[5];
GPU_TEXTURES_kernel= functions[6]; GPU_TEXTURES_kernel= functions[6];
GPU_RBGA_kernel= functions[7]; GPU_RBGA_kernel= functions[7]; //// *** Modified 2025 *** ////
GPU_ROT_DERIV_kernel = functions[8]; GPU_ROT_DERIV_kernel = functions[8];
GPU_CALCULATE_TILES_OFFSETS_kernel = functions[9]; GPU_CALCULATE_TILES_OFFSETS_kernel = functions[9];
GPU_CALC_REVERSE_DISTORTION_kernel = functions[10]; GPU_CALC_REVERSE_DISTORTION_kernel = functions[10];
...@@ -380,7 +382,7 @@ public class GPUTileProcessor { ...@@ -380,7 +382,7 @@ public class GPUTileProcessor {
GPU_MARK_TEXTURE_NEIGHBOR_kernel = functions[13]; GPU_MARK_TEXTURE_NEIGHBOR_kernel = functions[13];
GPU_GEN_TEXTURE_LIST_kernel = functions[14]; GPU_GEN_TEXTURE_LIST_kernel = functions[14];
GPU_CLEAR_TEXTURE_RBGA_kernel = functions[15]; GPU_CLEAR_TEXTURE_RBGA_kernel = functions[15];
GPU_TEXTURES_ACCUMULATE_kernel = functions[16]; GPU_TEXTURES_ACCUMULATE_kernel = functions[16]; //// *** Modified 2025 *** ////
GPU_CREATE_NONOVERLAP_LIST_kernel = functions[17]; GPU_CREATE_NONOVERLAP_LIST_kernel = functions[17];
GPU_ERASE_CLT_TILES_kernel = functions[18]; GPU_ERASE_CLT_TILES_kernel = functions[18];
...@@ -504,7 +506,7 @@ public class GPUTileProcessor { ...@@ -504,7 +506,7 @@ public class GPUTileProcessor {
// Use the NVRTC to create a program by compiling the source code // Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram(); nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null); nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
String options[] = {"--gpu-architecture=compute_"+capability}; String options[] = {"--gpu-architecture=compute_"+capability,"--extensible-whole-program"};
try { try {
nvrtcCompileProgram(program, options.length, options); nvrtcCompileProgram(program, options.length, options);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment