Commit 131b371e authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: Step 2 corr — GpuQuadJna correlation overrides (full CUAS GPU surface, compiles)

Override execCorr2D_TD / execCorr2D_inter_TD / execCorr2D_combine / execCorr2D_normalize / getCorr2D /
getCorr2DCombo delegating to the granular TpProc functions (setCorrMask kept for getNumUsedPairs;
mono scale triplet 1/0/0; init|no_transpose<<1). handleWH -> full-frame no-op (TpProc fixed-size).
GpuQuadJna now covers the full CUAS GPU surface (geometry/kernels/bayer/tasks/convert/imclt/getRBG/
correlations). mvn compile clean. fcorr_weights (per-tile) + setLpf* not yet plumbed — to surface in
troubleshooting. Next: backend selector.
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 34acc8ba
...@@ -202,7 +202,55 @@ public class GpuQuadJna extends GpuQuad { ...@@ -202,7 +202,55 @@ public class GpuQuadJna extends GpuQuad {
return fimg; return fimg;
} }
// ---- correlations (execCorr2D_TD / _inter_TD / _combine / _normalize, getCorr2D[Combo]): // ---- correlations (delegate to the granular TpProc functions) ----
// NOT yet overridden — added in the next increment. Inherited base versions touch null JCuda private float[] scaleTriplet(double[] scales) {
// buffers, so the selector/test must drive only the convert->imclt->getRBG path until then. int ncol = Math.min(num_colors, 3);
return new float[]{
(ncol > 1) ? (float) scales[0] : 1f,
(ncol > 1) ? (float) scales[1] : 0f,
(ncol > 2) ? (float) scales[2] : 0f };
}
@Override public void execCorr2D_TD(double[] scales, int[] sel_pairs_in) {
ensureRbgCorr();
setCorrMask(sel_pairs_in); // base: corr_mask_int + indices (host-only) so getNumUsedPairs() works
float[] s = scaleTriplet(scales);
lib.tp_proc_exec_corr2d_td(proc, sel_pairs_in[0], sel_pairs_in[1], sel_pairs_in[2], sel_pairs_in[3], s[0], s[1], s[2]);
}
@Override public void execCorr2D_inter_TD(double[] scales) {
ensureRbgCorr();
float[] s = scaleTriplet(scales);
lib.tp_proc_exec_corr2d_inter_td(proc, getSensorMaskInter(), s[0], s[1], s[2]);
}
@Override public void execCorr2D_combine(boolean init_corr, int num_pairs_in, int pairs_mask, boolean no_transpose_vertical) {
int num_pairs = getNumUsedPairs();
int init_corr_combo = (init_corr ? 1 : 0) + (no_transpose_vertical ? 2 : 0);
lib.tp_proc_exec_corr2d_combine(proc, init_corr_combo, num_pairs, pairs_mask);
}
@Override public void execCorr2D_normalize(boolean combo, double fat_zero, float[] fcorr_weights, int corr_radius) {
// fcorr_weights (per-tile fat-zero divisor) not yet plumbed -> native uses null (matches the common null path)
lib.tp_proc_exec_corr2d_normalize(proc, combo ? 1 : 0, fat_zero, corr_radius);
}
@Override public float[][] getCorr2D(int corr_rad) {
int corr_size = (2 * corr_rad + 1) * (2 * corr_rad + 1);
int n = lib.tp_proc_num_corr_tiles(proc);
float[] flat = new float[n * corr_size];
lib.tp_proc_get_corr2d(proc, flat, corr_rad);
float[][] corrs = new float[n][corr_size];
for (int i = 0; i < n; i++) System.arraycopy(flat, i * corr_size, corrs[i], 0, corr_size);
return corrs;
}
@Override public float[][] getCorr2DCombo(int corr_rad) {
int corr_size = (2 * corr_rad + 1) * (2 * corr_rad + 1);
int n = lib.tp_proc_num_corr_combo(proc);
float[] flat = new float[n * corr_size];
lib.tp_proc_get_corr2d_combo(proc, flat);
float[][] corrs = new float[n][corr_size];
for (int i = 0; i < n; i++) System.arraycopy(flat, i * corr_size, corrs[i], 0, corr_size);
return corrs;
}
// TpProc is full-frame fixed-size -> no per-WOI realloc; report the full frame.
@Override public int[] handleWH(int[] wh, boolean ref_scene) {
return (wh != null) ? wh : new int[]{img_width, img_height};
}
} }
...@@ -86,6 +86,15 @@ public interface TpJna extends Library { ...@@ -86,6 +86,15 @@ public interface TpJna extends Library {
int tp_proc_exec_corr2d(Pointer proc, double fatZero); int tp_proc_exec_corr2d(Pointer proc, double fatZero);
/** de-pitch quad correlation; returns num_corr_combo. */ /** de-pitch quad correlation; returns num_corr_combo. */
int tp_proc_get_corr2d_combo(Pointer proc, float[] out); int tp_proc_get_corr2d_combo(Pointer proc, float[] out);
// ---- granular correlations (match GpuQuad's separate exec calls) ----
int tp_proc_exec_corr2d_td(Pointer proc, int s0,int s1,int s2,int s3, float sc0,float sc1,float sc2);
int tp_proc_exec_corr2d_inter_td(Pointer proc, int selSensors, float sc0,float sc1,float sc2);
int tp_proc_exec_corr2d_combine(Pointer proc, int initCorrCombo, int numPairs, int pairsMask);
int tp_proc_exec_corr2d_normalize(Pointer proc, int combo, double fatZero, int corrRadius);
int tp_proc_get_corr2d(Pointer proc, float[] out, int corrRad);
int tp_proc_num_corr_tiles(Pointer proc);
int tp_proc_num_corr_combo(Pointer proc);
void tp_proc_destroy(Pointer proc); void tp_proc_destroy(Pointer proc);
/** Validate the persistent path: convert CLT (outCltErr), imclt RBG (outRbgErr), quad corr /** Validate the persistent path: convert CLT (outCltErr), imclt RBG (outRbgErr), quad corr
* order-independent (outCorrErr), + no_kernels smoke (outNokernMax). */ * order-independent (outCorrErr), + no_kernels smoke (outNokernMax). */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment