Commit 34acc8ba authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: Step 2 (B) — GpuQuadJna convert->imclt->getRBG overrides delegate to TpProc

Override GpuQuad's GPU-touching methods for the image path, delegating to the native TpProc
(with own caching, since base uses the null gpuTileProcessor):
- setGeometryCorrection / setExtrinsicsVector -> tp_proc_set_geometry / set_correction_vector
  (gc.expandSensors(16).toFloatArray, cv.toFullRollArray).
- setConvolutionKernels -> per-cam transpose-flatten (i=((i0&7)<<3)+((i0>>3)&7), CltExtra offsets)
  -> tp_proc_set_kernels / set_kernel_offsets.
- setBayerImages -> channel-combine -> tp_proc_set_image (center -> set_center_image broadcast).
- setTasks -> TpTask.asFloatArray -> tp_proc_set_tasks.
- execSetTilesOffsets -> set gc+cv -> tp_proc_exec_geometry.
- execConvertDirect(ref_scene,wh,erase_clt,no_kernels,use_center_image) -> tp_proc_exec_convert_direct
  (honors no_kernels skip-deconvolution + use_center_image, the fragile paths).
- execImcltRbgAll -> tp_proc_exec_imclt; getRBG -> tp_proc_get_rbg + same inner-region extraction.

mvn -DskipTests compile clean; all @Override signatures match base. Correlations (execCorr2D_*) and the
backend selector are next. JCUDA remains the untouched default.
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent a138f826
package com.elphel.imagej.gpu.jna; package com.elphel.imagej.gpu.jna;
import com.elphel.imagej.gpu.GpuQuad; import com.elphel.imagej.gpu.GpuQuad;
import com.elphel.imagej.gpu.GPUTileProcessor;
import com.elphel.imagej.gpu.TpTask;
import com.elphel.imagej.tileprocessor.Correlation2d;
import com.elphel.imagej.tileprocessor.CorrVector;
import com.elphel.imagej.tileprocessor.GeometryCorrection;
import com.elphel.imagej.tileprocessor.QuadCLT; import com.elphel.imagej.tileprocessor.QuadCLT;
import com.sun.jna.Native; import com.sun.jna.Native;
import com.sun.jna.Pointer; import com.sun.jna.Pointer;
...@@ -63,9 +68,141 @@ public class GpuQuadJna extends GpuQuad { ...@@ -63,9 +68,141 @@ public class GpuQuadJna extends GpuQuad {
if (module != null) lib.tp_destroy_module(module); if (module != null) lib.tp_destroy_module(module);
} }
// NOTE: GPU-touching methods (setGeometryCorrection / setConvolutionKernels / setBayerImages / // ---- own caching flags (base uses gpuTileProcessor.* which is null here) ----
// setTasks / execSetTilesOffsets / execConvertDirect / execImcltRbgAll / getRBG / execCorr2D_* / private boolean jna_kernels_set = false;
// getCorr2D / getCorr2DCombo / handleWH) are overridden incrementally to delegate to TpProc. private boolean jna_rbg_corr_ready = false;
// Until overridden, the inherited base methods would touch null JCuda buffers — the selector must
// only route the validated CUAS path here. // Allocate imclt(RBG)+correlation buffers once. RBG sizing needs only img/num_colors; correlation
// buffers are sized by num_all_pairs + corr_out_rad. sel_pairs/color_weights here are placeholders
// (the correlation overrides — added next — will pass the real per-call values).
private void ensureRbgCorr() {
if (jna_rbg_corr_ready) return;
int corr_out_rad = 7; // CORR_OUT_RAD -> 15x15
float[] cw = (num_colors == 1) ? new float[]{1f,1f,1f} : new float[]{0.25f,0.25f,0.5f};
lib.tp_proc_setup_rbg_corr(proc, Correlation2d.getNumPairs(num_cams), 0,0,0,0, cw[0],cw[1],cw[2], corr_out_rad);
jna_rbg_corr_ready = true;
}
// ---- geometry ----
@Override public void setGeometryCorrection() { setGeometryCorrection(quadCLT.getGeometryCorrection(), false); }
@Override public void setGeometryCorrection(GeometryCorrection gc, boolean use_java_rByRDist) {
float[] fgc = gc.expandSensors(GPUTileProcessor.MAX_NUM_CAMS).toFloatArray();
lib.tp_proc_set_geometry(proc, fgc, fgc.length);
}
@Override public void setExtrinsicsVector(CorrVector cv) {
double[] dcv = cv.toFullRollArray();
float[] fcv = new float[dcv.length];
for (int i = 0; i < dcv.length; i++) fcv[i] = (float) dcv[i];
lib.tp_proc_set_correction_vector(proc, fcv, fcv.length);
}
// ---- aberration kernels (same transpose-flatten as GpuQuad, then native upload) ----
@Override public void setConvolutionKernels(boolean force) {
if (jna_kernels_set && !force) return;
setConvolutionKernels(quadCLT.getCLTKernels(), true);
}
@Override public void setConvolutionKernels(double[][][][][][] clt_kernels, boolean force) {
if (jna_kernels_set && !force) return;
final boolean transpose = true;
int num_kernels = clt_kernels[0][0].length * clt_kernels[0][0][0].length * clt_kernels[0].length;
int kernel_length = num_kernels * 4 * GPUTileProcessor.DTT_SIZE * GPUTileProcessor.DTT_SIZE;
float[] fkernel = new float[kernel_length];
float[] foffsets = new float[num_kernels * 8];
for (int ncam = 0; ncam < clt_kernels.length; ncam++) {
int indx = 0;
for (int ty = 0; ty < clt_kernels[ncam][0].length; ty++)
for (int tx = 0; tx < clt_kernels[ncam][0][ty].length; tx++)
for (int col = 0; col < clt_kernels[ncam].length; col++)
for (int p = 0; p < 4; p++) {
double[] pa = clt_kernels[ncam][col][ty][tx][p];
for (int i0 = 0; i0 < 64; i0++) {
int i = transpose ? (((i0 & 7) << 3) + ((i0 >> 3) & 7)) : i0;
fkernel[indx++] = (float) pa[i];
}
}
indx = 0;
for (int ty = 0; ty < clt_kernels[ncam][0].length; ty++)
for (int tx = 0; tx < clt_kernels[ncam][0][ty].length; tx++)
for (int col = 0; col < clt_kernels[ncam].length; col++) {
double[] pa = clt_kernels[ncam][col][ty][tx][4];
for (int i = 0; i < pa.length; i++) foffsets[indx++] = (float) pa[i];
}
lib.tp_proc_set_kernels(proc, ncam, fkernel, fkernel.length);
lib.tp_proc_set_kernel_offsets(proc, ncam, foffsets, foffsets.length);
}
jna_kernels_set = true;
}
// ---- bayer / source images (combine channels -> one float[] per cam) ----
@Override public void setBayerImages(boolean force, boolean center) {
if (center) {
double[][] bc = quadCLT.getImageCenter();
quadCLT.getResetImageCenter();
float[] f = combineChannels(bc);
if (f != null) lib.tp_proc_set_center_image(proc, f); // broadcast to all sensors
return;
}
setBayerImages(quadCLT.getResetImageData(), true);
}
@Override public void setBayerImages(double[][][] bayer_data, boolean force) {
for (int ncam = 0; ncam < bayer_data.length; ncam++) {
float[] f = combineChannels(bayer_data[ncam]);
if (f != null) lib.tp_proc_set_image(proc, ncam, f);
}
}
// sum the (1 or 3) split-color channels into one image, as GpuQuad.setBayerImages does
private static float[] combineChannels(double[][] chans) {
if (chans == null || chans[0] == null) return null;
float[] f = new float[chans[0].length];
for (int i = 0; i < f.length; i++) { double s = chans[0][i]; for (int j = 1; j < chans.length; j++) s += chans[j][i]; f[i] = (float) s; }
return f;
}
// ---- tasks ----
@Override public void setTasks(TpTask[] tile_tasks, boolean use_aux, boolean verify) {
num_task_tiles = tile_tasks.length;
int task_size = getTaskSize();
float[] ftasks = new float[task_size * num_task_tiles];
for (int i = 0; i < num_task_tiles; i++) { tile_tasks[i].task |= 511; tile_tasks[i].asFloatArray(ftasks, i, use_aux); }
lib.tp_proc_set_tasks(proc, ftasks, num_task_tiles, task_size * num_task_tiles);
}
// ---- geometry execution (calc_reverse_distortions + rot_derivs + calculate_tiles_offsets) ----
@Override public void execSetTilesOffsets(boolean uniform_grid) {
setGeometryCorrection(); // gc
setExtrinsicsVector(quadCLT.getGeometryCorrection().expandSensors(GPUTileProcessor.MAX_NUM_CAMS).getCorrVector()); // cv
lib.tp_proc_exec_geometry(proc, uniform_grid ? 1 : 0);
}
// ---- direct CLT conversion (+ the fragile no_kernels / use_center_image / erase_clt / ref_scene paths) ----
@Override public void execConvertDirect(boolean ref_scene, int[] wh, int erase_clt, boolean no_kernels, boolean use_center_image) {
boolean skip_kernels = rectilinear || no_kernels;
if (!skip_kernels) setConvolutionKernels(false);
if (!rectilinear) setBayerImages(false, use_center_image);
lib.tp_proc_exec_convert_direct(proc, ref_scene ? 1 : 0, erase_clt, no_kernels ? 1 : 0);
}
// ---- inverse CLT -> RBG ----
@Override public void execImcltRbgAll(boolean is_mono, boolean ref_scene, int[] wh) {
ensureRbgCorr();
lib.tp_proc_exec_imclt(proc, 1); // apply_lpf = 1
}
@Override public float[][] getRBG(int ncam) {
int out_width = getImageWidth(), out_height = getImageHeight();
int gpu_width = out_width + GPUTileProcessor.DTT_SIZE, gpu_height = out_height + GPUTileProcessor.DTT_SIZE;
int gpu_img_size = gpu_width * gpu_height, rslt_img_size = out_width * out_height;
float[] packed = new float[num_colors * gpu_img_size]; // == TpProc rbg_w x rbg_h (de-pitched)
lib.tp_proc_get_rbg(proc, ncam, packed);
float[][] fimg = new float[num_colors][rslt_img_size];
for (int ncol = 0; ncol < num_colors; ncol++) {
int tl = (GPUTileProcessor.DTT_SIZE / 2) * (gpu_width + 1) + ncol * gpu_img_size;
for (int nrow = 0; nrow < out_height; nrow++)
System.arraycopy(packed, tl + gpu_width * nrow, fimg[ncol], out_width * nrow, out_width);
}
return fimg;
}
// ---- correlations (execCorr2D_TD / _inter_TD / _combine / _normalize, getCorr2D[Combo]):
// NOT yet overridden — added in the next increment. Inherited base versions touch null JCuda
// buffers, so the selector/test must drive only the convert->imclt->getRBG path until then.
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment