CLAUDE: JNA rectilinear single-array config port (Phase 1)

Fixes the whole bug-class behind the -Dtp.backend=jna NPE in GpuQuad.setLpfRbg (CuasRanging.detectTargets -> CuasMotion -> setRectilinearReferenceTD): the rectilinear single-camera GpuQuad was built via the raw JCuda ctor, bypassing the backend factory, so in JNA mode it got a null gpuTileProcessor. - GpuQuad.createRectilinear(): backend-aware factory parallel to create(). JCUDA branch is byte-for-byte the legacy ctor (oracle path untouched); JNA branch builds a clean rectilinear GpuQuadJna. New no-alloc rectilinear ctor (num_cams=1, no kernels/geometry). - GpuQuadJna: rectilinear ctor + shared initNative(); the two overrides the gap-finder predicted -- reAllocateClt (no-op; native CLT pre-sized in setup) and singular setBayerImage (-> tp_proc_set_image). execConvertDirect already guarded on the rectilinear flag. - CuasMotion:452 routed through createRectilinear (CUAS rectilinear now JNA-capable). - ComboMatch:899 fail-loud UnsupportedOperationException in JNA mode (orthomosaic, wider unported surface, off the current path -- stays JCuda). Java-only; libtileproc.so untouched. mvn compile clean. JCuda legacy frozen as oracle; core convert_direct flag-soup cleanup deferred to Phase 2. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

CLAUDE: JNA rectilinear single-array config port (Phase 1)
Fixes the whole bug-class behind the -Dtp.backend=jna NPE in GpuQuad.setLpfRbg (CuasRanging.detectTargets -> CuasMotion -> setRectilinearReferenceTD): the rectilinear single-camera GpuQuad was built via the raw JCuda ctor, bypassing the backend factory, so in JNA mode it got a null gpuTileProcessor. - GpuQuad.createRectilinear(): backend-aware factory parallel to create(). JCUDA branch is byte-for-byte the legacy ctor (oracle path untouched); JNA branch builds a clean rectilinear GpuQuadJna. New no-alloc rectilinear ctor (num_cams=1, no kernels/geometry). - GpuQuadJna: rectilinear ctor + shared initNative(); the two overrides the gap-finder predicted -- reAllocateClt (no-op; native CLT pre-sized in setup) and singular setBayerImage (-> tp_proc_set_image). execConvertDirect already guarded on the rectilinear flag. - CuasMotion:452 routed through createRectilinear (CUAS rectilinear now JNA-capable). - ComboMatch:899 fail-loud UnsupportedOperationException in JNA mode (orthomosaic, wider unported surface, off the current path -- stays JCuda). Java-only; libtileproc.so untouched. mvn compile clean. JCuda legacy frozen as oracle; core convert_direct flag-soup cleanup deferred to Phase 2. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
b65ee10d · Andrey Filippov · 753d6900 · b65ee10d · b65ee10d · b65ee10d
Commit b65ee10d authored Jun 26, 2026 by Andrey Filippov
4 changed files
--- a/src/main/java/com/elphel/imagej/cuas/CuasMotion.java
+++ b/src/main/java/com/elphel/imagej/cuas/CuasMotion.java
@@ -449,8 +449,8 @@ public class CuasMotion {
 			slice_titles[nscan] =  scene_titles[frame_cent];
 		}
 		try {
-			gpuQuad = new GpuQuad(//single camera
+			gpuQuad = GpuQuad.createRectilinear( // single camera; backend-aware (JCUDA legacy or native JNA)
-					gpuTileProcessor,   // GPUTileProcessor gpuTileProcessor,
+					gpuTileProcessor,   // GPUTileProcessor gpuTileProcessor (null/unused in JNA mode),
 					gpu_max_width,        // final int        max_width,
 					gpu_max_height,       // final int        max_height,
 					1,                    // final int        num_colors, // normally 1?

--- a/src/main/java/com/elphel/imagej/gpu/GpuQuad.java
+++ b/src/main/java/com/elphel/imagej/gpu/GpuQuad.java
@@ -520,6 +520,53 @@ public class GpuQuad{ // quad camera description
 		return new GpuQuad(gpuTileProcessor, quadCLT, debug_level);
 	}
+	// Backend-aware factory for the RECTILINEAR single-array GpuQuad (synthetic, non-sensor images:
+	// no distortion correction, no aberration kernels, single camera) — parallel to create().
+	// JCUDA branch is byte-for-byte the legacy `new GpuQuad(gpuTileProcessor, w, h, colors, dbg)` so the
+	// oracle path is untouched; JNA branch builds a clean rectilinear GpuQuadJna (no kernel/geometry baggage).
+	// By Claude on 06/26/2026.
+	public static GpuQuad createRectilinear(
+			GPUTileProcessor gpuTileProcessor, // ignored in JNA mode (null is fine there)
+			int              max_width,
+			int              max_height,
+			int              num_colors,
+			int              debug_level) {
+		if (useJnaBackend()) {
+			String src   = System.getProperty("tp.jna.srcdir", "/home/elphel/git/tile_processor_gpu/src");
+			String devrt = System.getProperty("tp.jna.devrt",  "/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a");
+			System.out.println("GpuQuad.createRectilinear(): NATIVE (JNA) backend "+max_width+"x"+max_height+
+					" colors="+num_colors+" (src="+src+")");
+			return new com.elphel.imagej.gpu.jna.GpuQuadJna(max_width, max_height, num_colors, src, devrt, debug_level);
+		}
+		return new GpuQuad(gpuTileProcessor, max_width, max_height, num_colors, debug_level);
+	}
+	// No-allocation RECTILINEAR constructor for the native (JNA) backend subclass GpuQuadJna (no QuadCLT,
+	// single synthetic array). Mirrors the JCuda rectilinear ctor's CONFIG (rectilinear, num_cams=1, no
+	// kernels) but allocates NO JCuda GPU memory and creates NO JCuda context. By Claude on 06/26/2026.
+	protected GpuQuad(
+			int     max_width,
+			int     max_height,
+			int     num_colors,
+			int     debug_level,
+			boolean native_backend) {
+		this.rectilinear =   true;
+		setGpu_debug_level(debug_level);
+		this.gpuTileProcessor = null;
+		this.quadCLT =       null;
+		this.img_width =     max_width;
+		this.img_height =    max_height;
+		this.num_cams =      1;          // single camera, only interscene correlations
+		this.num_all_pairs = 0;
+		this.num_colors =    num_colors;
+		this.kernels_hor =   0;
+		this.kernels_vert =  0;
+		this.kern_tiles =    0;
+		this.kern_size =     0;
+		setSensorMaskInter(-1);
+		// no cuMemAlloc — native backend allocates its own GPU memory
+	}
 	// No-allocation constructor for the native (JNA) backend subclass GpuQuadJna.
 	// Sets the final config fields from quadCLT but allocates NO JCuda GPU memory and creates NO
 	// JCuda context (gpuTileProcessor = null). The subclass owns its native (TpProc) GPU memory and

--- a/src/main/java/com/elphel/imagej/gpu/jna/GpuQuadJna.java
+++ b/src/main/java/com/elphel/imagej/gpu/jna/GpuQuadJna.java
@@ -24,12 +24,13 @@ import com.sun.jna.Pointer;
 * By Claude on 2026-06-25.
 */
 public class GpuQuadJna extends GpuQuad {
-    private final TpJna   lib;
+    private TpJna   lib;        // assigned once in initNative() (non-final to share across both ctors)
-    private final Pointer module;   // TpModule* (NVRTC kernels)
+    private Pointer module;     // TpModule* (NVRTC kernels)
-    private final Pointer proc;      // TpProc*   (persistent device buffers)
+    private Pointer proc;       // TpProc*   (persistent device buffers)
    private boolean closed = false;
    /**
+     * Per-scene (sensor) backend.
     * @param quadCLT   the per-scene QuadCLT (geometry, kernels, sensor config)
     * @param srcdir    directory with the kernel .cu/.h sources for NVRTC (e.g. tile_processor_gpu/src)
     * @param devrt     path to libcudadevrt.a (for CDP linking)
@@ -37,6 +38,23 @@ public class GpuQuadJna extends GpuQuad {
     */
    public GpuQuadJna(QuadCLT quadCLT, String srcdir, String devrt, int debugLevel) {
        super(quadCLT, debugLevel, true); // no-alloc: config fields only, no JCuda context
+        initNative(srcdir, devrt);
+    }
+    /**
+     * RECTILINEAR single-array backend (synthetic non-sensor images: no distortion, no aberration kernels,
+     * single camera). Mirrors GpuQuad's rectilinear ctor config but stands up its own native TpProc.
+     * Built via GpuQuad.createRectilinear() in JNA mode (e.g. the CUAS interscene GpuQuad). By Claude on 06/26/2026.
+     */
+    public GpuQuadJna(int width, int height, int num_colors, String srcdir, String devrt, int debugLevel) {
+        super(width, height, num_colors, debugLevel, true); // no-alloc rectilinear config (num_cams=1, no kernels)
+        initNative(srcdir, devrt);
+    }
+    // Shared native bring-up: load the lib, NVRTC-compile the module, create the persistent TpProc and size
+    // its buffers. tp_proc_setup uses the (already-set) config fields uniformly — for the rectilinear ctor
+    // num_cams=1 and kernels_hor=kern_tiles=0, so no kernel buffers are allocated.
+    private void initNative(String srcdir, String devrt) {
        this.lib = Native.load("tileproc", TpJna.class);
        this.module = lib.tp_create_module(srcdir, devrt);
        if (module == null) {
@@ -186,6 +204,16 @@ public class GpuQuadJna extends GpuQuad {
        for (int i = 0; i < f.length; i++) { double s = chans[0][i]; for (int j = 1; j < chans.length; j++) s += chans[j][i]; f[i] = (float) s; }
        return f;
    }
+    // Single pre-combined image upload (the rectilinear/synthetic path: ImageDtt.setRectilinearReferenceTD
+    // passes fpixels_ref directly). Base does a JCuda cuMemcpy2D into gpu_bayer_h[ncam]. By Claude on 06/26/2026.
+    @Override public void setBayerImage(float[] bayer_image, int ncam) {
+        lib.tp_proc_set_image(proc, ncam, bayer_image);
+    }
+    // CLT buffers (main + ref) are pre-allocated full-frame in tp_proc_setup, so there is nothing to (re)allocate
+    // per call. Base allocs JCuda gpu_clt[_ref] here; native is fixed-size -> no-op (nothing newly allocated).
+    @Override public boolean reAllocateClt(int[] wh, boolean ref_scene) {
+        return false;
+    }
    // ---- tasks ----
    @Override public void setTasks(TpTask[] tile_tasks, boolean use_aux, boolean verify) {

--- a/src/main/java/com/elphel/imagej/orthomosaic/ComboMatch.java
+++ b/src/main/java/com/elphel/imagej/orthomosaic/ComboMatch.java
@@ -897,6 +897,14 @@ public class ComboMatch {
 		// for all modes - needed for create_overlaps || process_correlation || render_match || pattern_match
    	if (GPU_QUAD_AFFINE == null) {
+    		// Fail loud rather than NPE deep in JCuda: the orthomosaic GPU_QUAD_AFFINE (rectilinear) path is
+    		// not on the validated JNA surface yet (wider than CUAS: affine matching). Run orthomosaic on
+    		// JCuda. The CUAS rectilinear path IS ported (GpuQuad.createRectilinear). By Claude on 06/26/2026.
+    		if (GpuQuad.useJnaBackend()) {
+    			throw new UnsupportedOperationException(
+    					"ComboMatch GPU_QUAD_AFFINE (orthomosaic, rectilinear) is not yet ported to the JNA backend "+
+    					"(-Dtp.backend=jna); run orthomosaic with the JCUDA backend.");
+    		}
        	System.out.println("Setting up GPU");
    		try {
    			GPU_QUAD_AFFINE = new GpuQuad(//