Commit b65ee10d authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: JNA rectilinear single-array config port (Phase 1)

Fixes the whole bug-class behind the -Dtp.backend=jna NPE in GpuQuad.setLpfRbg
(CuasRanging.detectTargets -> CuasMotion -> setRectilinearReferenceTD): the
rectilinear single-camera GpuQuad was built via the raw JCuda ctor, bypassing
the backend factory, so in JNA mode it got a null gpuTileProcessor.

- GpuQuad.createRectilinear(): backend-aware factory parallel to create().
  JCUDA branch is byte-for-byte the legacy ctor (oracle path untouched); JNA
  branch builds a clean rectilinear GpuQuadJna. New no-alloc rectilinear ctor
  (num_cams=1, no kernels/geometry).
- GpuQuadJna: rectilinear ctor + shared initNative(); the two overrides the
  gap-finder predicted -- reAllocateClt (no-op; native CLT pre-sized in setup)
  and singular setBayerImage (-> tp_proc_set_image). execConvertDirect already
  guarded on the rectilinear flag.
- CuasMotion:452 routed through createRectilinear (CUAS rectilinear now
  JNA-capable).
- ComboMatch:899 fail-loud UnsupportedOperationException in JNA mode
  (orthomosaic, wider unported surface, off the current path -- stays JCuda).

Java-only; libtileproc.so untouched. mvn compile clean. JCuda legacy frozen as
oracle; core convert_direct flag-soup cleanup deferred to Phase 2.
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 753d6900
...@@ -449,8 +449,8 @@ public class CuasMotion { ...@@ -449,8 +449,8 @@ public class CuasMotion {
slice_titles[nscan] = scene_titles[frame_cent]; slice_titles[nscan] = scene_titles[frame_cent];
} }
try { try {
gpuQuad = new GpuQuad(//single camera gpuQuad = GpuQuad.createRectilinear( // single camera; backend-aware (JCUDA legacy or native JNA)
gpuTileProcessor, // GPUTileProcessor gpuTileProcessor, gpuTileProcessor, // GPUTileProcessor gpuTileProcessor (null/unused in JNA mode),
gpu_max_width, // final int max_width, gpu_max_width, // final int max_width,
gpu_max_height, // final int max_height, gpu_max_height, // final int max_height,
1, // final int num_colors, // normally 1? 1, // final int num_colors, // normally 1?
......
...@@ -520,6 +520,53 @@ public class GpuQuad{ // quad camera description ...@@ -520,6 +520,53 @@ public class GpuQuad{ // quad camera description
return new GpuQuad(gpuTileProcessor, quadCLT, debug_level); return new GpuQuad(gpuTileProcessor, quadCLT, debug_level);
} }
// Backend-aware factory for the RECTILINEAR single-array GpuQuad (synthetic, non-sensor images:
// no distortion correction, no aberration kernels, single camera) — parallel to create().
// JCUDA branch is byte-for-byte the legacy `new GpuQuad(gpuTileProcessor, w, h, colors, dbg)` so the
// oracle path is untouched; JNA branch builds a clean rectilinear GpuQuadJna (no kernel/geometry baggage).
// By Claude on 06/26/2026.
public static GpuQuad createRectilinear(
GPUTileProcessor gpuTileProcessor, // ignored in JNA mode (null is fine there)
int max_width,
int max_height,
int num_colors,
int debug_level) {
if (useJnaBackend()) {
String src = System.getProperty("tp.jna.srcdir", "/home/elphel/git/tile_processor_gpu/src");
String devrt = System.getProperty("tp.jna.devrt", "/usr/local/cuda/targets/x86_64-linux/lib/libcudadevrt.a");
System.out.println("GpuQuad.createRectilinear(): NATIVE (JNA) backend "+max_width+"x"+max_height+
" colors="+num_colors+" (src="+src+")");
return new com.elphel.imagej.gpu.jna.GpuQuadJna(max_width, max_height, num_colors, src, devrt, debug_level);
}
return new GpuQuad(gpuTileProcessor, max_width, max_height, num_colors, debug_level);
}
// No-allocation RECTILINEAR constructor for the native (JNA) backend subclass GpuQuadJna (no QuadCLT,
// single synthetic array). Mirrors the JCuda rectilinear ctor's CONFIG (rectilinear, num_cams=1, no
// kernels) but allocates NO JCuda GPU memory and creates NO JCuda context. By Claude on 06/26/2026.
protected GpuQuad(
int max_width,
int max_height,
int num_colors,
int debug_level,
boolean native_backend) {
this.rectilinear = true;
setGpu_debug_level(debug_level);
this.gpuTileProcessor = null;
this.quadCLT = null;
this.img_width = max_width;
this.img_height = max_height;
this.num_cams = 1; // single camera, only interscene correlations
this.num_all_pairs = 0;
this.num_colors = num_colors;
this.kernels_hor = 0;
this.kernels_vert = 0;
this.kern_tiles = 0;
this.kern_size = 0;
setSensorMaskInter(-1);
// no cuMemAlloc — native backend allocates its own GPU memory
}
// No-allocation constructor for the native (JNA) backend subclass GpuQuadJna. // No-allocation constructor for the native (JNA) backend subclass GpuQuadJna.
// Sets the final config fields from quadCLT but allocates NO JCuda GPU memory and creates NO // Sets the final config fields from quadCLT but allocates NO JCuda GPU memory and creates NO
// JCuda context (gpuTileProcessor = null). The subclass owns its native (TpProc) GPU memory and // JCuda context (gpuTileProcessor = null). The subclass owns its native (TpProc) GPU memory and
......
...@@ -24,12 +24,13 @@ import com.sun.jna.Pointer; ...@@ -24,12 +24,13 @@ import com.sun.jna.Pointer;
* By Claude on 2026-06-25. * By Claude on 2026-06-25.
*/ */
public class GpuQuadJna extends GpuQuad { public class GpuQuadJna extends GpuQuad {
private final TpJna lib; private TpJna lib; // assigned once in initNative() (non-final to share across both ctors)
private final Pointer module; // TpModule* (NVRTC kernels) private Pointer module; // TpModule* (NVRTC kernels)
private final Pointer proc; // TpProc* (persistent device buffers) private Pointer proc; // TpProc* (persistent device buffers)
private boolean closed = false; private boolean closed = false;
/** /**
* Per-scene (sensor) backend.
* @param quadCLT the per-scene QuadCLT (geometry, kernels, sensor config) * @param quadCLT the per-scene QuadCLT (geometry, kernels, sensor config)
* @param srcdir directory with the kernel .cu/.h sources for NVRTC (e.g. tile_processor_gpu/src) * @param srcdir directory with the kernel .cu/.h sources for NVRTC (e.g. tile_processor_gpu/src)
* @param devrt path to libcudadevrt.a (for CDP linking) * @param devrt path to libcudadevrt.a (for CDP linking)
...@@ -37,6 +38,23 @@ public class GpuQuadJna extends GpuQuad { ...@@ -37,6 +38,23 @@ public class GpuQuadJna extends GpuQuad {
*/ */
public GpuQuadJna(QuadCLT quadCLT, String srcdir, String devrt, int debugLevel) { public GpuQuadJna(QuadCLT quadCLT, String srcdir, String devrt, int debugLevel) {
super(quadCLT, debugLevel, true); // no-alloc: config fields only, no JCuda context super(quadCLT, debugLevel, true); // no-alloc: config fields only, no JCuda context
initNative(srcdir, devrt);
}
/**
* RECTILINEAR single-array backend (synthetic non-sensor images: no distortion, no aberration kernels,
* single camera). Mirrors GpuQuad's rectilinear ctor config but stands up its own native TpProc.
* Built via GpuQuad.createRectilinear() in JNA mode (e.g. the CUAS interscene GpuQuad). By Claude on 06/26/2026.
*/
public GpuQuadJna(int width, int height, int num_colors, String srcdir, String devrt, int debugLevel) {
super(width, height, num_colors, debugLevel, true); // no-alloc rectilinear config (num_cams=1, no kernels)
initNative(srcdir, devrt);
}
// Shared native bring-up: load the lib, NVRTC-compile the module, create the persistent TpProc and size
// its buffers. tp_proc_setup uses the (already-set) config fields uniformly — for the rectilinear ctor
// num_cams=1 and kernels_hor=kern_tiles=0, so no kernel buffers are allocated.
private void initNative(String srcdir, String devrt) {
this.lib = Native.load("tileproc", TpJna.class); this.lib = Native.load("tileproc", TpJna.class);
this.module = lib.tp_create_module(srcdir, devrt); this.module = lib.tp_create_module(srcdir, devrt);
if (module == null) { if (module == null) {
...@@ -186,6 +204,16 @@ public class GpuQuadJna extends GpuQuad { ...@@ -186,6 +204,16 @@ public class GpuQuadJna extends GpuQuad {
for (int i = 0; i < f.length; i++) { double s = chans[0][i]; for (int j = 1; j < chans.length; j++) s += chans[j][i]; f[i] = (float) s; } for (int i = 0; i < f.length; i++) { double s = chans[0][i]; for (int j = 1; j < chans.length; j++) s += chans[j][i]; f[i] = (float) s; }
return f; return f;
} }
// Single pre-combined image upload (the rectilinear/synthetic path: ImageDtt.setRectilinearReferenceTD
// passes fpixels_ref directly). Base does a JCuda cuMemcpy2D into gpu_bayer_h[ncam]. By Claude on 06/26/2026.
@Override public void setBayerImage(float[] bayer_image, int ncam) {
lib.tp_proc_set_image(proc, ncam, bayer_image);
}
// CLT buffers (main + ref) are pre-allocated full-frame in tp_proc_setup, so there is nothing to (re)allocate
// per call. Base allocs JCuda gpu_clt[_ref] here; native is fixed-size -> no-op (nothing newly allocated).
@Override public boolean reAllocateClt(int[] wh, boolean ref_scene) {
return false;
}
// ---- tasks ---- // ---- tasks ----
@Override public void setTasks(TpTask[] tile_tasks, boolean use_aux, boolean verify) { @Override public void setTasks(TpTask[] tile_tasks, boolean use_aux, boolean verify) {
......
...@@ -897,6 +897,14 @@ public class ComboMatch { ...@@ -897,6 +897,14 @@ public class ComboMatch {
// for all modes - needed for create_overlaps || process_correlation || render_match || pattern_match // for all modes - needed for create_overlaps || process_correlation || render_match || pattern_match
if (GPU_QUAD_AFFINE == null) { if (GPU_QUAD_AFFINE == null) {
// Fail loud rather than NPE deep in JCuda: the orthomosaic GPU_QUAD_AFFINE (rectilinear) path is
// not on the validated JNA surface yet (wider than CUAS: affine matching). Run orthomosaic on
// JCuda. The CUAS rectilinear path IS ported (GpuQuad.createRectilinear). By Claude on 06/26/2026.
if (GpuQuad.useJnaBackend()) {
throw new UnsupportedOperationException(
"ComboMatch GPU_QUAD_AFFINE (orthomosaic, rectilinear) is not yet ported to the JNA backend "+
"(-Dtp.backend=jna); run orthomosaic with the JCUDA backend.");
}
System.out.println("Setting up GPU"); System.out.println("Setting up GPU");
try { try {
GPU_QUAD_AFFINE = new GpuQuad(// GPU_QUAD_AFFINE = new GpuQuad(//
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment