Unified GPU for DP2/no DP2 (12.6.0/11.2.0)

0123f06e · Andrey Filippov · 8755d17e · 0123f06e · 0123f06e
Commit 0123f06e authored Jul 21, 2025 by Andrey Filippov
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 197 additions and 57 deletions

GPUTileProcessor.java src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java +30 -13

GpuQuad.java src/main/java/com/elphel/imagej/gpu/GpuQuad.java +167 -44

No files found.
--- a/src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+++ b/src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
@@ -6,7 +6,7 @@ package com.elphel.imagej.gpu;
 ** GPU acceleration for the Tile Processor
 **
 **
-** Copyright (C) 2018 Elphel, Inc.
+** Copyright (C) 2018-2025 Elphel, Inc.
 **
 ** -----------------------------------------------------------------------------**
 **
@@ -48,7 +48,7 @@ import static jcuda.nvrtc.JNvrtc.nvrtcCreateProgram;
 import static jcuda.nvrtc.JNvrtc.nvrtcDestroyProgram;
 import static jcuda.nvrtc.JNvrtc.nvrtcGetPTX;
 import static jcuda.nvrtc.JNvrtc.nvrtcGetProgramLog;
-import static jcuda.nvrtc.JNvrtc.nvrtcVersion;
+//import static jcuda.nvrtc.JNvrtc.nvrtcVersion;
 import static jcuda.nvrtc.JNvrtc.nvrtcGetNumSupportedArchs;
 import static jcuda.nvrtc.JNvrtc.nvrtcGetSupportedArchs;

@@ -62,6 +62,7 @@ import com.elphel.imagej.tileprocessor.Correlation2d;

 import ij.IJ;
 import ij.text.TextWindow;
+import jcuda.JCudaVersion;
 import jcuda.Pointer;
 import jcuda.driver.CUcontext;
 import jcuda.driver.CUdevice;
@@ -75,16 +76,20 @@ import jcuda.nvrtc.JNvrtc;
 import jcuda.nvrtc.nvrtcProgram;

 public class GPUTileProcessor {
+	public static String  CUDA_VERSION = JCudaVersion.get(); 
+	public static boolean USE_CUDA12 = CUDA_VERSION.startsWith("12.");
 	public static boolean USE_DS_DP = false; // Use Dynamic Shared memory with Dynamic Parallelism (not implemented)  
 	String LIBRARY_PATH = "/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a"; // linux
 	// Can be downloaded and twice extracted from
 	// https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cudart-dev-11-2_11.2.152-1_amd64.deb
 	// First deb itself, then data.tar.xz, and it will have usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a inside
 	// Found "cuda-cudart-dev" on https://ubuntu.pkgs.org/
-	static String GPU_RESOURCE_DIR =              "kernels";
+	static String GPU_RESOURCE_TOP_DIR =              "kernels";
 	static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"};
 	// "*" - generated defines, first index - separately compiled unit
-	static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
+	static String [][] GPU_SRC_FILES = USE_CUDA12?
+			(new String[][] {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cu"}}):
+			(new String[][] {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}});
 	static String GPU_CONVERT_DIRECT_NAME =        "convert_direct";      // name in C code
 	static String GPU_IMCLT_ALL_NAME =             "imclt_rbg_all";
 	static String GPU_CORRELATE2D_NAME =           "correlate2D";         // name in C code
@@ -270,7 +275,6 @@ public class GPUTileProcessor {
    public GPUTileProcessor(
    		String cuda_project_directory) throws IOException
    {
-
    	// From code by Marco Hutter - http://www.jcuda.org
        // Enable exceptions and omit all subsequent error checks
        JCudaDriver.setExceptionsEnabled(true);
@@ -310,7 +314,7 @@ public class GPUTileProcessor {
            	}else {
                	File file = null;
                	if ((cuda_project_directory == null) || cuda_project_directory.isEmpty()) {
-                		file = new File(classLoader.getResource(GPU_RESOURCE_DIR+"/"+src_file).getFile());
+                		file = new File(classLoader.getResource(GPU_RESOURCE_TOP_DIR+"/"+CUDA_VERSION+"/"+src_file).getFile());
                		System.out.println("Loading resource "+file);
                	} else {
                		File src_dir = new File(cuda_project_directory, "src");
@@ -507,22 +511,35 @@ public class GPUTileProcessor {
    		// Use the NVRTC to create a program by compiling the source code
    		nvrtcProgram program = new nvrtcProgram();
    		nvrtcCreateProgram(	program, sourceCode, null, 0, null, null);
-    		String options[] = {"--gpu-architecture=compute_"+capability};
-    		int [][] nvrtc_version = new int[2][];
+//    		String options[] = {"--gpu-architecture=compute_"+capability};
+//    		int [][] nvrtc_version = new int[2][];
    		int nvrtc_rslt = -1;
-    		/*
-    		nvrtc_rslt= nvrtcVersion(nvrtc_version[0],nvrtc_version[0]);
-    		System.out.println("nvrtcVersion="+nvrtc_version[0][0]+"."+nvrtc_version[1][0]+" (returned "+nvrtc_rslt+").");
-    		*/
    		int [] nvrtc_num_arch = new int[1]; 
    		nvrtc_rslt= nvrtcGetNumSupportedArchs(nvrtc_num_arch);
    		System.out.println("nvrtc_num_arch="+nvrtc_num_arch[0]+" (returned "+nvrtc_rslt+").");
    		int [] nvrtc_archs = new int[nvrtc_num_arch[0]];
    		nvrtc_rslt= nvrtcGetSupportedArchs(nvrtc_archs);
+    		int max_arch = 0;
+    		for (int sa: nvrtc_archs) {
+    			max_arch = Math.max(max_arch, sa);
+    		}
    		for (int sa: nvrtc_archs) {
    			System.out.println("Supported arch "+sa);
    		}
-    		System.out.println();
+			System.out.println("Max supported arch is "+max_arch+", gpu capability = "+capability);
+			if (capability > max_arch) {
+				capability = max_arch;
+				System.out.println("Reduced capability to match NVRTC compiler to "+capability);
+			}
+			String options[] = new String[USE_CUDA12?2:1];
+			options[0] = "--gpu-architecture=compute_"+capability;
+			if (options.length > 1) {
+				options[1] = "--extensible-whole-program";
+			}
+    		System.out.println("Running NVRTC with the following options:");
+    		for (String s:options) {
+    			System.out.println(s);
+    		}			
    		try {
    			nvrtcCompileProgram(program, options.length, options);
    			OK = true;

--- a/src/main/java/com/elphel/imagej/gpu/GpuQuad.java
+++ b/src/main/java/com/elphel/imagej/gpu/GpuQuad.java