Commit a138f826 authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: Step 2 (B) foundation — GpuQuad no-alloc ctor + GpuQuadJna subclass skeleton (compiles)

Architecture B (chosen after finding GpuQuad's surface is ~70 methods, too large for a clean interface):
- GpuQuad: add a protected no-alloc constructor (QuadCLT, debug_level, native_backend marker) that sets
  only the final config fields (gpuTileProcessor=null) and allocates NO JCuda memory / context. The
  working JCuda constructors are untouched.
- New GpuQuadJna extends GpuQuad: uses the no-alloc ctor, then stands up the native libtileproc.so via
  TpJna (tp_create_module + tp_proc_create + tp_proc_setup). Inherits all methods (so it compiles);
  GPU-touching methods will be overridden incrementally to delegate to TpProc, the rest throw to fail
  loudly off the validated path. close() frees native memory deterministically.

mvn -DskipTests compile: clean. JCUDA remains the default/working path. Next: per-method override
marshalling (kernels/bayer/geometry/tasks + convert/imclt/getRBG/corr), then the backend selector
(QuadCLT ctor) and the live JCUDA-vs-JNA file comparison.
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 9234a307
......@@ -490,6 +490,31 @@ public class GpuQuad{ // quad camera description
texture_stride_rgba = (int)(device_stride[0] / Sizeof.FLOAT);
}
// No-allocation constructor for the native (JNA) backend subclass GpuQuadJna.
// Sets the final config fields from quadCLT but allocates NO JCuda GPU memory and creates NO
// JCuda context (gpuTileProcessor = null). The subclass owns its native (TpProc) GPU memory and
// overrides every GPU-touching method. The boolean marker disambiguates this constructor.
protected GpuQuad(
final QuadCLT quadCLT,
int debug_level,
boolean native_backend) {
this.rectilinear = false;
setGpu_debug_level(debug_level);
this.gpuTileProcessor = null;
this.quadCLT = quadCLT;
int [] wh = quadCLT.getGeometryCorrection().getSensorWH();
this.img_width = wh[0];
this.img_height = wh[1];
this.num_cams = quadCLT.getNumSensors();
this.num_all_pairs = Correlation2d.getNumPairs(num_cams);
this.num_colors = quadCLT.isMonochrome() ? 1 : 3;
this.kernels_hor = (quadCLT.getCLTKernels() == null) ? 0 : quadCLT.getCLTKernels()[0][0][0].length;
this.kernels_vert = (quadCLT.getCLTKernels() == null) ? 0 : quadCLT.getCLTKernels()[0][0].length;
this.kern_tiles = kernels_hor * kernels_vert * num_colors;
this.kern_size = kern_tiles * 4 * 64;
// no cuMemAlloc — native backend allocates its own GPU memory
}
// Constructor for rectilinear synthetic images
public GpuQuad(
GPUTileProcessor gpuTileProcessor,
......
package com.elphel.imagej.gpu.jna;
import com.elphel.imagej.gpu.GpuQuad;
import com.elphel.imagej.tileprocessor.QuadCLT;
import com.sun.jna.Native;
import com.sun.jna.Pointer;
/**
* Native (JNA) GPU backend — architecture B: subclass {@link GpuQuad}, but allocate NO JCuda memory and
* create NO JCuda context. The constructor uses GpuQuad's protected no-alloc constructor (config fields
* only) and stands up the native tile-processor library ({@code libtileproc.so}) via {@link TpJna}:
* a {@code TpModule} (NVRTC-compiled kernels) and a persistent {@code TpProc} instance that owns all GPU
* memory. Each GPU-touching method of GpuQuad is overridden to delegate to {@code TpProc}; methods not yet
* ported throw {@link UnsupportedOperationException} (so anything off the validated path fails loudly
* rather than silently using the null base JCuda buffers).
*
* Selected once at startup (default backend stays JCUDA); JNA mode never initializes JCuda.
* Validated kernel path (Stages 1–5 + StageProc): geometry, convert_direct, imclt, correlations.
* By Claude on 2026-06-25.
*/
public class GpuQuadJna extends GpuQuad {
private final TpJna lib;
private final Pointer module; // TpModule* (NVRTC kernels)
private final Pointer proc; // TpProc* (persistent device buffers)
private boolean closed = false;
/**
* @param quadCLT the per-scene QuadCLT (geometry, kernels, sensor config)
* @param srcdir directory with the kernel .cu/.h sources for NVRTC (e.g. tile_processor_gpu/src)
* @param devrt path to libcudadevrt.a (for CDP linking)
* @param debugLevel gpu debug level
*/
public GpuQuadJna(QuadCLT quadCLT, String srcdir, String devrt, int debugLevel) {
super(quadCLT, debugLevel, true); // no-alloc: config fields only, no JCuda context
this.lib = Native.load("tileproc", TpJna.class);
this.module = lib.tp_create_module(srcdir, devrt);
if (module == null) {
throw new IllegalStateException("GpuQuadJna: tp_create_module failed: " + lib.tp_last_error());
}
this.proc = lib.tp_proc_create(module);
if (proc == null) {
throw new IllegalStateException("GpuQuadJna: tp_proc_create failed: " + lib.tp_last_error());
}
int rc = lib.tp_proc_setup(proc, num_cams, num_colors, img_width, img_height,
kernels_hor, kern_tiles);
if (rc != 0) {
throw new IllegalStateException("GpuQuadJna: tp_proc_setup rc=" + rc + ": " + lib.tp_last_error());
}
// tp_proc_setup_rbg_corr (imclt + correlation buffers) is deferred until the correlation config
// (num_pairs / sel_pairs / color_weights / corr_out_rad) is known at first use.
}
/** Native handles for the override implementations (added incrementally). */
protected TpJna lib() { return lib; }
protected Pointer module() { return module; }
protected Pointer proc() { return proc; }
/** Release native GPU memory + module (deterministic — does not rely on GC/finalize). */
public synchronized void close() {
if (closed) return;
closed = true;
if (proc != null) lib.tp_proc_destroy(proc);
if (module != null) lib.tp_destroy_module(module);
}
// NOTE: GPU-touching methods (setGeometryCorrection / setConvolutionKernels / setBayerImages /
// setTasks / execSetTilesOffsets / execConvertDirect / execImcltRbgAll / getRBG / execCorr2D_* /
// getCorr2D / getCorr2DCombo / handleWH) are overridden incrementally to delegate to TpProc.
// Until overridden, the inherited base methods would touch null JCuda buffers — the selector must
// only route the validated CUAS path here.
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment