Commit 39b0fb90 authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: gpuTrace dedup + GpuQuadJna oracle TD-corr readback overrides

gpuTrace now prints each Class.method ONCE (was per-call -> spammy). Oracle JCuda trace showed it uses
the TD-correlation readback path: getCorrIndices / getCorrTdData / getCorrComboIndices / eraseGpuCorrs
(un-overridden -> would NPE in JNA). Override them via the new native tp_proc_get_corr_indices /
get_corr_combo_indices / get_corr_td (DtoH) + tp_proc_erase_corrs. mvn -DskipTests compile clean.
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 8b3dd30b
......@@ -494,7 +494,12 @@ public class GpuQuad{ // quad camera description
// "GpuQuadJna" if a JNA run falls through to an un-overridden base method -> reveals oracle's actual
// GPU-method usage (run JCuda first) and any JNA coverage gap, before an NPE. By Claude 2026-06-25.
public static boolean GPU_TRACE = "1".equals(System.getProperty("tp.trace"));
protected void gpuTrace(String m) { if (GPU_TRACE) System.out.println("[GPUTRACE] " + getClass().getSimpleName() + "." + m); }
private static final java.util.Set<String> GPU_TRACE_SEEN = java.util.concurrent.ConcurrentHashMap.newKeySet();
protected void gpuTrace(String m) { // print each Class.method ONCE (the diagnostic is the set used, not every call)
if (!GPU_TRACE) return;
String key = getClass().getSimpleName() + "." + m;
if (GPU_TRACE_SEEN.add(key)) System.out.println("[GPUTRACE] " + key);
}
// Backend selector (architecture B). Default JCUDA; set -Dtp.backend=jna to use the native
// (libtileproc.so via JNA) backend GpuQuadJna instead. JNA mode never initializes JCuda.
......
......@@ -295,6 +295,28 @@ public class GpuQuadJna extends GpuQuad {
return (wh != null) ? wh : new int[]{img_width, img_height};
}
// ---- oracle TD-correlation readback / erase (read gpu_corr_indices / gpu_corrs_td / combo_indices) ----
@Override public int[] getCorrIndices() {
int n = lib.tp_proc_num_corr_tiles(proc);
int[] out = new int[n];
lib.tp_proc_get_corr_indices(proc, out, n);
return out;
}
@Override public float[] getCorrTdData() {
int n = lib.tp_proc_num_corr_tiles(proc);
int td = 4 * getDttSize() * getDttSize();
float[] out = new float[n * td];
lib.tp_proc_get_corr_td(proc, out);
return out;
}
@Override public int[] getCorrComboIndices() {
int n = lib.tp_proc_num_corr_combo(proc);
int[] out = new int[n];
lib.tp_proc_get_corr_combo_indices(proc, out, n);
return out;
}
@Override public void eraseGpuCorrs() { lib.tp_proc_erase_corrs(proc); }
// ---- LPF filter coefficients -> native __constant__ memory (same as GpuQuad's cuModuleGetGlobal path) ----
@Override public void setLpfRbg(float[][] lpf_rbg, boolean debug) {
int l = lpf_rbg[0].length;
......
......@@ -99,6 +99,11 @@ public interface TpJna extends Library {
int tp_proc_num_corr_combo(Pointer proc);
/** Upload to a named __constant__ symbol (lpf_data / lpf_corr / lpf_rb_corr). 0 on success. */
int tp_proc_set_const(Pointer proc, String name, float[] data, int n);
// ---- oracle TD-correlation readback / erase ----
int tp_proc_get_corr_indices(Pointer proc, int[] out, int n);
int tp_proc_get_corr_combo_indices(Pointer proc, int[] out, int n);
int tp_proc_get_corr_td(Pointer proc, float[] out);
int tp_proc_erase_corrs(Pointer proc);
void tp_proc_destroy(Pointer proc);
/** Validate the persistent path: convert CLT (outCltErr), imclt RBG (outRbgErr), quad corr
* order-independent (outCorrErr), + no_kernels smoke (outNokernMax). */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment