Commit 0123f06e authored by Andrey Filippov's avatar Andrey Filippov

Unified GPU for DP2/no DP2 (12.6.0/11.2.0)

parent 8755d17e
......@@ -6,7 +6,7 @@ package com.elphel.imagej.gpu;
** GPU acceleration for the Tile Processor
**
**
** Copyright (C) 2018 Elphel, Inc.
** Copyright (C) 2018-2025 Elphel, Inc.
**
** -----------------------------------------------------------------------------**
**
......@@ -48,7 +48,7 @@ import static jcuda.nvrtc.JNvrtc.nvrtcCreateProgram;
import static jcuda.nvrtc.JNvrtc.nvrtcDestroyProgram;
import static jcuda.nvrtc.JNvrtc.nvrtcGetPTX;
import static jcuda.nvrtc.JNvrtc.nvrtcGetProgramLog;
import static jcuda.nvrtc.JNvrtc.nvrtcVersion;
//import static jcuda.nvrtc.JNvrtc.nvrtcVersion;
import static jcuda.nvrtc.JNvrtc.nvrtcGetNumSupportedArchs;
import static jcuda.nvrtc.JNvrtc.nvrtcGetSupportedArchs;
......@@ -62,6 +62,7 @@ import com.elphel.imagej.tileprocessor.Correlation2d;
import ij.IJ;
import ij.text.TextWindow;
import jcuda.JCudaVersion;
import jcuda.Pointer;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
......@@ -75,16 +76,20 @@ import jcuda.nvrtc.JNvrtc;
import jcuda.nvrtc.nvrtcProgram;
public class GPUTileProcessor {
public static String CUDA_VERSION = JCudaVersion.get();
public static boolean USE_CUDA12 = CUDA_VERSION.startsWith("12.");
public static boolean USE_DS_DP = false; // Use Dynamic Shared memory with Dynamic Parallelism (not implemented)
String LIBRARY_PATH = "/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a"; // linux
// Can be downloaded and twice extracted from
// https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cudart-dev-11-2_11.2.152-1_amd64.deb
// First deb itself, then data.tar.xz, and it will have usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a inside
// Found "cuda-cudart-dev" on https://ubuntu.pkgs.org/
static String GPU_RESOURCE_DIR = "kernels";
static String GPU_RESOURCE_TOP_DIR = "kernels";
static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"};
// "*" - generated defines, first index - separately compiled unit
static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
static String [][] GPU_SRC_FILES = USE_CUDA12?
(new String[][] {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cu"}}):
(new String[][] {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}});
static String GPU_CONVERT_DIRECT_NAME = "convert_direct"; // name in C code
static String GPU_IMCLT_ALL_NAME = "imclt_rbg_all";
static String GPU_CORRELATE2D_NAME = "correlate2D"; // name in C code
......@@ -270,7 +275,6 @@ public class GPUTileProcessor {
public GPUTileProcessor(
String cuda_project_directory) throws IOException
{
// From code by Marco Hutter - http://www.jcuda.org
// Enable exceptions and omit all subsequent error checks
JCudaDriver.setExceptionsEnabled(true);
......@@ -310,7 +314,7 @@ public class GPUTileProcessor {
}else {
File file = null;
if ((cuda_project_directory == null) || cuda_project_directory.isEmpty()) {
file = new File(classLoader.getResource(GPU_RESOURCE_DIR+"/"+src_file).getFile());
file = new File(classLoader.getResource(GPU_RESOURCE_TOP_DIR+"/"+CUDA_VERSION+"/"+src_file).getFile());
System.out.println("Loading resource "+file);
} else {
File src_dir = new File(cuda_project_directory, "src");
......@@ -507,22 +511,35 @@ public class GPUTileProcessor {
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
String options[] = {"--gpu-architecture=compute_"+capability};
int [][] nvrtc_version = new int[2][];
// String options[] = {"--gpu-architecture=compute_"+capability};
// int [][] nvrtc_version = new int[2][];
int nvrtc_rslt = -1;
/*
nvrtc_rslt= nvrtcVersion(nvrtc_version[0],nvrtc_version[0]);
System.out.println("nvrtcVersion="+nvrtc_version[0][0]+"."+nvrtc_version[1][0]+" (returned "+nvrtc_rslt+").");
*/
int [] nvrtc_num_arch = new int[1];
nvrtc_rslt= nvrtcGetNumSupportedArchs(nvrtc_num_arch);
System.out.println("nvrtc_num_arch="+nvrtc_num_arch[0]+" (returned "+nvrtc_rslt+").");
int [] nvrtc_archs = new int[nvrtc_num_arch[0]];
nvrtc_rslt= nvrtcGetSupportedArchs(nvrtc_archs);
int max_arch = 0;
for (int sa: nvrtc_archs) {
max_arch = Math.max(max_arch, sa);
}
for (int sa: nvrtc_archs) {
System.out.println("Supported arch "+sa);
}
System.out.println();
System.out.println("Max supported arch is "+max_arch+", gpu capability = "+capability);
if (capability > max_arch) {
capability = max_arch;
System.out.println("Reduced capability to match NVRTC compiler to "+capability);
}
String options[] = new String[USE_CUDA12?2:1];
options[0] = "--gpu-architecture=compute_"+capability;
if (options.length > 1) {
options[1] = "--extensible-whole-program";
}
System.out.println("Running NVRTC with the following options:");
for (String s:options) {
System.out.println(s);
}
try {
nvrtcCompileProgram(program, options.length, options);
OK = true;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment