Commit 747ff9d1 authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: GpuQuadJna texture overrides (oracle): execTextures + readback

Completes the oracle GPU surface. The reliable gap finder is
  comm -23 <(ImageDtt gpuQuad.* calls) <(GpuQuadJna overrides)
not the gpuTrace dump (only ~14 methods are instrumented, so e.g.
getFlatTextures was invisible in the trace though it is on the path).

Overrides (delegating to the new tp_proc_* texture API):
- execTextures: builds weights[3]/params[5], forwards calc_textures/calc_extra/
  linescan/dust/keep flags. Implements the production (USE_DS_DP) behavior.
- getTextureIndices: reads kernel-built count + packed indices.
- getExtra: reshapes diff_rgb_combo (texture_indices order) into
  [num_cams*(num_colors+1)][tilesX*tilesY] keyed by ntile -- identical to base.
- getFlatTextures: de-pitches gpu_textures -- identical to base.

TpJna.java: bindings for tp_proc_exec_textures/get_texture_indices/
get_diff_rgb_combo/get_textures.

Edits only -- not mvn-compiled (Eyesis run was live). Signatures match base
@Override; referenced fields are public final / public static.
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 39b0fb90
...@@ -333,4 +333,62 @@ public class GpuQuadJna extends GpuQuad { ...@@ -333,4 +333,62 @@ public class GpuQuadJna extends GpuQuad {
@Override public void printConstMem(String const_name, boolean flt) { @Override public void printConstMem(String const_name, boolean flt) {
if (flt) System.out.println("GpuQuadJna.printConstMem(" + const_name + "): skipped (native backend)"); if (flt) System.out.println("GpuQuadJna.printConstMem(" + const_name + "): skipped (native backend)");
} }
// ---- textures (oracle path): textures_nonoverlap (DP) + readback. Matches GpuQuad.execTextures_DP:
// textures_nonoverlap<<<1,1>>> CDP-builds the index list (create_nonoverlap_list) then CDP-launches
// textures_accumulate. Production uses linescan_order=0 (so getExtra de-interleaves via texture_indices).
@Override public void execTextures(
double [] color_weights, boolean is_lwir, double min_shot, double scale_shot,
double diff_sigma, double diff_threshold, double min_agree, boolean dust_remove,
int keep_weights, boolean calc_textures, boolean calc_extra, boolean linescan_order) {
int nc = Math.min(color_weights.length, 3);
float [] weights3 = new float[3];
weights3[0] = (float) color_weights[0];
weights3[1] = (nc > 1) ? (float) color_weights[1] : 0.0f;
weights3[2] = (nc > 2) ? (float) color_weights[2] : 0.0f;
float [] params5 = { (float) min_shot, (float) scale_shot, (float) diff_sigma,
(float) diff_threshold, (float) min_agree };
lib.tp_proc_exec_textures(proc, params5, weights3,
is_lwir ? 1 : 0, dust_remove ? 1 : 0, keep_weights,
calc_textures ? 1 : 0, calc_extra ? 1 : 0, linescan_order ? 1 : 0);
}
// read pnum (kernel-built count) then that many packed indices (= GpuQuad.getTextureIndices, sans the
// private num_texture_tiles side-effect, which our overridden getFlatTextures does not depend on).
@Override public int[] getTextureIndices() {
int tilesX = img_width / getDttSize(), tilesY = img_height / getDttSize();
int [] tmp = new int [tilesX * tilesY];
int cnt = lib.tp_proc_get_texture_indices(proc, tmp, tmp.length);
if (cnt < 0) cnt = 0;
int [] out = new int [cnt];
System.arraycopy(tmp, 0, out, 0, cnt);
return out;
}
// diff_rgb_combo "extra": reshape diff_rgb_combo (texture_indices order, linescan_order=0) into
// [num_cams*(num_colors+1)][tilesX*tilesY] keyed by ntile. Identical to GpuQuad.getExtra.
@Override public float[][] getExtra() {
int [] texture_indices = getTextureIndices();
int num_tile_extra = num_cams * (num_colors + 1);
float [] diff_rgb_combo = new float [texture_indices.length * num_tile_extra];
if (diff_rgb_combo.length > 0) lib.tp_proc_get_diff_rgb_combo(proc, diff_rgb_combo, diff_rgb_combo.length);
int tilesX = img_width / getDttSize(), tilesY = img_height / getDttSize();
float [][] extra = new float [num_tile_extra][tilesX * tilesY];
for (int i = 0; i < texture_indices.length; i++) {
if (((texture_indices[i] >> GPUTileProcessor.LIST_TEXTURE_BIT) & 1) != 0) {
int ntile = (texture_indices[i] >> GPUTileProcessor.TEXT_NTILE_SHIFT);
for (int l = 0; l < num_tile_extra; l++) {
extra[l][ntile] = diff_rgb_combo[i * num_tile_extra + l];
}
}
}
return extra;
}
// de-pitch gpu_textures (= GpuQuad.getFlatTextures): num_tiles * (slices * (2*DTT)^2) floats.
@Override public float[] getFlatTextures(int num_tiles, int num_colors, boolean keep_weights) {
int texture_slices = num_colors + 1 + (keep_weights ? (num_cams + num_colors + 1) : 0);
int texture_slice_size = (2 * getDttSize()) * (2 * getDttSize());
int texture_tile_size = texture_slices * texture_slice_size;
float [] out = new float [texture_tile_size * num_tiles];
lib.tp_proc_get_textures(proc, out, num_tiles, num_colors, keep_weights ? 1 : 0);
return out;
}
} }
...@@ -104,6 +104,18 @@ public interface TpJna extends Library { ...@@ -104,6 +104,18 @@ public interface TpJna extends Library {
int tp_proc_get_corr_combo_indices(Pointer proc, int[] out, int n); int tp_proc_get_corr_combo_indices(Pointer proc, int[] out, int n);
int tp_proc_get_corr_td(Pointer proc, float[] out); int tp_proc_get_corr_td(Pointer proc, float[] out);
int tp_proc_erase_corrs(Pointer proc); int tp_proc_erase_corrs(Pointer proc);
// ---- oracle textures: textures_nonoverlap (DP) + readback (matches GpuQuad.execTextures_DP) ----
/** textures_nonoverlap<<<1,1>>>: CDP-builds the index list + accumulate. params5={min_shot,scale_shot,
* diff_sigma,diff_threshold,min_agree}; weights3=R,B,G. calc_textures->gpu_textures, calc_extra->
* diff_rgb_combo, linescan_order=0 in production. Returns the kernel-built texture-tile count. */
int tp_proc_exec_textures(Pointer proc, float[] params5, float[] weights3, int isLwir,
int dustRemove, int keepWeights, int calcTextures, int calcExtra, int linescanOrder);
/** Read min(count,n) packed texture indices into out; returns the true count (gpu_texture_indices_len). */
int tp_proc_get_texture_indices(Pointer proc, int[] out, int n);
/** Read n = num_tex*num_cams*(num_colors+1) floats of diff_rgb_combo (texture_indices order). */
int tp_proc_get_diff_rgb_combo(Pointer proc, float[] out, int n);
/** De-pitch gpu_textures into out[num_tiles * slices*256], slices per keep_weights. */
int tp_proc_get_textures(Pointer proc, float[] out, int numTiles, int numColors, int keepWeights);
void tp_proc_destroy(Pointer proc); void tp_proc_destroy(Pointer proc);
/** Validate the persistent path: convert CLT (outCltErr), imclt RBG (outRbgErr), quad corr /** Validate the persistent path: convert CLT (outCltErr), imclt RBG (outRbgErr), quad corr
* order-independent (outCorrErr), + no_kernels smoke (outNokernMax). */ * order-independent (outCorrErr), + no_kernels smoke (outNokernMax). */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment