Commit 872107bf authored by Andrey Filippov's avatar Andrey Filippov

Claude: refineMotionVectors() — multithreaded masking + pre-computed kernel

- Pre-compute integer-pixel raised-cosine mask kernel once before nseq loop
- Allocate fpixels_masked[nframes][width*height] once; clear per nseq with Arrays.fill
- Restructure masking: parallel outer loop over frames (nFr via AtomicInteger),
  inner loop over tiles — no write contention (non-overlapping tile guarantee)
- Pre-extract tile center/velocity into arrays before thread launch
- Replace new ImagePlus(FloatProcessor) debug calls with ShowDoubleFloatArrays.showArrays()
- Fix "centre" -> "center" in comments; naming: nFr for ai.getAndIncrement(), ntile for plain for
parent c76920b7
...@@ -2309,34 +2309,36 @@ public class CuasMotion { ...@@ -2309,34 +2309,36 @@ public class CuasMotion {
final int n_recenter = clt_parameters.imp.cuas_n_recenter; final int n_recenter = clt_parameters.imp.cuas_n_recenter;
final double rstr = clt_parameters.imp.cuas_rstr; final double rstr = clt_parameters.imp.cuas_rstr;
// Boosted pair count, centred on frame_center // Boosted pair count, centered on frame_center
final int corr_pairs_ref = (int) Math.round(2 * half_accum_range * recalc_mv_boost); final int corr_pairs_ref = (int) Math.round(2 * half_accum_range * recalc_mv_boost);
// Hard-coded debug selectors: set >= 0 to enable per-scan/per-tile visualisation // Hard-coded debug selectors: set >= 0 to enable per-scan/per-tile visualisation
final int dbg_nseq = -1; final int dbg_nseq = 20; // -1;
final int dbg_tile = -1; final int dbg_tile = 51+38*80; // -1;
// Show the mask weight profile once // Pre-compute integer-pixel raised-cosine mask kernel once (shared across all nseq)
if ((dbg_nseq >= 0 || dbg_tile >= 0) && debugLevel >= 0) { final int r1i = (int) Math.ceil(recalc_mv_r1);
int r1i = (int) Math.ceil(recalc_mv_r1); final int mside = 2 * r1i + 1;
int msize = 2 * r1i + 1; final float[] mask_kernel = new float[mside * mside];
float[] mask_img = new float[msize * msize]; for (int dy = -r1i; dy <= r1i; dy++) {
for (int dy = -r1i; dy <= r1i; dy++) { for (int dx = -r1i; dx <= r1i; dx++) {
for (int dx = -r1i; dx <= r1i; dx++) { double r = Math.sqrt(dx * dx + dy * dy);
double r = Math.sqrt((double)(dx * dx + dy * dy)); float w;
double w; if (r <= recalc_mv_r0) w = 1.0f;
if (r <= recalc_mv_r0) w = 1.0; else if (r >= recalc_mv_r1) w = 0.0f;
else if (r >= recalc_mv_r1) w = 0.0; else w = (float)(0.5 * (Math.cos(Math.PI * (r - recalc_mv_r0) / (recalc_mv_r1 - recalc_mv_r0)) + 1.0));
else w = 0.5 * (Math.cos(Math.PI * (r - recalc_mv_r0) / (recalc_mv_r1 - recalc_mv_r0)) + 1.0); mask_kernel[(dy + r1i) * mside + (dx + r1i)] = w;
mask_img[(dy + r1i) * msize + (dx + r1i)] = (float) w;
}
} }
new ImagePlus("refineMotionVectors-mask-r0_" + recalc_mv_r0 + "-r1_" + recalc_mv_r1, }
new FloatProcessor(msize, msize, mask_img)).show(); if ((dbg_nseq >= 0 || dbg_tile >= 0) && debugLevel >= 0) {
ShowDoubleFloatArrays.showArrays(mask_kernel.clone(), mside, mside,
"refineMotionVectors-mask-r0_" + recalc_mv_r0 + "-r1_" + recalc_mv_r1);
} }
final int r1i = (int) Math.ceil(recalc_mv_r1) + 1; // pixel search radius // Allocate staging array once; each nseq clears only the frames it needs
float[][] fpixels_masked = new float[nframes][]; // lazy-allocated per nseq final float[][] fpixels_masked = new float[nframes][width * height];
final Thread[] threads = ImageDtt.newThreadArray();
final AtomicInteger ai = new AtomicInteger(0);
for (int nseq = 0; nseq < targets_nonoverlap.length; nseq++) { for (int nseq = 0; nseq < targets_nonoverlap.length; nseq++) {
// Skip scan positions where no target has a known centroid yet // Skip scan positions where no target has a known centroid yet
...@@ -2351,7 +2353,7 @@ public class CuasMotion { ...@@ -2351,7 +2353,7 @@ public class CuasMotion {
if (!has_centered) continue; if (!has_centered) continue;
int frame_center = frame0 + nseq * corr_inc; int frame_center = frame0 + nseq * corr_inc;
// Centre the boosted correlation window on frame_center // Center the boosted correlation window on frame_center
int frame0_ref = frame_center - corr_pairs_ref / 2; int frame0_ref = frame_center - corr_pairs_ref / 2;
int frame1_ref = frame0_ref + corr_offset; int frame1_ref = frame0_ref + corr_offset;
...@@ -2361,58 +2363,70 @@ public class CuasMotion { ...@@ -2361,58 +2363,70 @@ public class CuasMotion {
int fmax_alloc = Math.min(nframes - 1, frame1_ref + corr_pairs_ref - 1); int fmax_alloc = Math.min(nframes - 1, frame1_ref + corr_pairs_ref - 1);
if (fmin_alloc > fmax_alloc) continue; // no valid pairs in range if (fmin_alloc > fmax_alloc) continue; // no valid pairs in range
// Reset masked-frame array for this scan, allocate zero arrays for the window // Clear staging frames for this scan position
for (int f = 0; f < nframes; f++) fpixels_masked[f] = null; for (int f = fmin_alloc; f <= fmax_alloc; f++) Arrays.fill(fpixels_masked[f], 0.0f);
for (int f = fmin_alloc; f <= fmax_alloc; f++) {
fpixels_masked[f] = new float[width * height]; // Pre-extract tile center/velocity for thread-safe parallel access
} final int ntiles = targets_nonoverlap[nseq].length;
final boolean[] tile_valid = new boolean[ntiles];
// Apply the raised-cosine mask around each target, tracking its motion final double[] tcx0 = new double[ntiles];
for (int ntile = 0; ntile < targets_nonoverlap[nseq].length; ntile++) { final double[] tcy0 = new double[ntiles];
final double[] tvx = new double[ntiles];
final double[] tvy = new double[ntiles];
for (int ntile = 0; ntile < ntiles; ntile++) {
double[] target = targets_nonoverlap[nseq][ntile]; double[] target = targets_nonoverlap[nseq][ntile];
if (target == null || Double.isNaN(target[CuasMotionLMA.RSLT_X])) continue; if (target == null || Double.isNaN(target[CuasMotionLMA.RSLT_X])) continue;
int tileX = ntile % tilesX; tile_valid[ntile] = true;
int tileY = ntile / tilesX; tcx0[ntile] = (ntile % tilesX) * tileSize + tileSize / 2.0 + target[CuasMotionLMA.RSLT_X];
// Target centre in image pixels at frame_center tcy0[ntile] = (ntile / tilesX) * tileSize + tileSize / 2.0 + target[CuasMotionLMA.RSLT_Y];
double cx0 = tileX * tileSize + tileSize / 2.0 + target[CuasMotionLMA.RSLT_X]; tvx[ntile] = target[CuasMotionLMA.RSLT_VX];
double cy0 = tileY * tileSize + tileSize / 2.0 + target[CuasMotionLMA.RSLT_Y]; tvy[ntile] = target[CuasMotionLMA.RSLT_VY];
double vx = target[CuasMotionLMA.RSLT_VX]; // pixels per corr_offset frames }
double vy = target[CuasMotionLMA.RSLT_VY];
// Parallel over frames: each thread applies all tile masks to one frame
for (int f = fmin_alloc; f <= fmax_alloc; f++) { final int fmin_f = fmin_alloc;
double offset_scale = (double)(f - frame_center) / corr_offset; final int frange = fmax_alloc - fmin_f + 1;
double cx = cx0 + vx * offset_scale; final int fcenter = frame_center;
double cy = cy0 + vy * offset_scale; ai.set(0);
int icx = (int) Math.round(cx); for (int ithread = 0; ithread < threads.length; ithread++) {
int icy = (int) Math.round(cy); threads[ithread] = new Thread() {
double dcx = cx - icx; public void run() {
double dcy = cy - icy; for (int nFr = ai.getAndIncrement(); nFr < frange; nFr = ai.getAndIncrement()) {
float[] src = fpixels[f]; int f = fmin_f + nFr;
float[] dst = fpixels_masked[f]; float[] src = fpixels[f];
for (int dy = -r1i; dy <= r1i; dy++) { float[] dst = fpixels_masked[f];
int py = icy + dy; double offset_scale = (double)(f - fcenter) / corr_offset;
if (py < 0 || py >= height) continue; for (int ntile = 0; ntile < ntiles; ntile++) {
for (int dx = -r1i; dx <= r1i; dx++) { if (!tile_valid[ntile]) continue;
int px = icx + dx; double cx = tcx0[ntile] + tvx[ntile] * offset_scale;
if (px < 0 || px >= width) continue; double cy = tcy0[ntile] + tvy[ntile] * offset_scale;
double r = Math.sqrt((dx - dcx) * (dx - dcx) + (dy - dcy) * (dy - dcy)); int icx = (int) Math.round(cx);
if (r >= recalc_mv_r1) continue; int icy = (int) Math.round(cy);
double w = (r <= recalc_mv_r0) ? 1.0 : for (int dy = -r1i; dy <= r1i; dy++) {
0.5 * (Math.cos(Math.PI * (r - recalc_mv_r0) / (recalc_mv_r1 - recalc_mv_r0)) + 1.0); int py = icy + dy;
dst[py * width + px] += (float)(w * src[py * width + px]); if (py < 0 || py >= height) continue;
for (int dx = -r1i; dx <= r1i; dx++) {
int px = icx + dx;
if (px < 0 || px >= width) continue;
float w = mask_kernel[(dy + r1i) * mside + (dx + r1i)];
if (w == 0.0f) continue;
dst[py * width + px] += w * src[py * width + px];
}
}
}
} }
} }
} };
} }
ImageDtt.startAndJoin(threads);
// Debug: show the masked source frame at frame_center for this scan // Debug: show the masked source frame at frame_center for this scan
if (nseq == dbg_nseq && debugLevel >= 0) { if (nseq == dbg_nseq && debugLevel >= 0) {
int f_show = Math.max(fmin_alloc, Math.min(fmax_alloc, frame_center)); int f_show = Math.max(fmin_alloc, Math.min(fmax_alloc, frame_center));
new ImagePlus("refineMotionVectors-masked-nseq" + nseq + "-f" + f_show, ShowDoubleFloatArrays.showArrays(fpixels_masked[f_show].clone(), width, height,
new FloatProcessor(width, height, fpixels_masked[f_show].clone())).show(); "refineMotionVectors-masked-nseq" + nseq + "-f" + f_show);
// Also show the same frame unmasked for comparison ShowDoubleFloatArrays.showArrays(fpixels[f_show], width, height,
new ImagePlus("refineMotionVectors-source-nseq" + nseq + "-f" + f_show, "refineMotionVectors-source-nseq" + nseq + "-f" + f_show);
new FloatProcessor(width, height, fpixels[f_show].clone())).show();
} }
TDCorrTile[] tdCorrTiles = cuasMotion.correlatePairs( TDCorrTile[] tdCorrTiles = cuasMotion.correlatePairs(
...@@ -2447,8 +2461,8 @@ public class CuasMotion { ...@@ -2447,8 +2461,8 @@ public class CuasMotion {
int corr_side = 2 * GPUTileProcessor.DTT_SIZE - 1; // 15 int corr_side = 2 * GPUTileProcessor.DTT_SIZE - 1; // 15
float[] corr_img = new float[corr_side * corr_side]; float[] corr_img = new float[corr_side * corr_side];
for (int i = 0; i < corr_img.length; i++) corr_img[i] = (float) corr_tiles_pd[dbg_tile][i]; for (int i = 0; i < corr_img.length; i++) corr_img[i] = (float) corr_tiles_pd[dbg_tile][i];
new ImagePlus("refineMotionVectors-corr2d-nseq" + nseq + "-tile" + dbg_tile, ShowDoubleFloatArrays.showArrays(corr_img, corr_side, corr_side,
new FloatProcessor(corr_side, corr_side, corr_img)).show(); "refineMotionVectors-corr2d-nseq" + nseq + "-tile" + dbg_tile);
} }
// Add differential MV to targets_nonoverlap in-place // Add differential MV to targets_nonoverlap in-place
...@@ -8308,7 +8322,6 @@ public class CuasMotion { ...@@ -8308,7 +8322,6 @@ public class CuasMotion {
boolean save_filtered_low = intermed_low && (niter < iter_show1); boolean save_filtered_low = intermed_low && (niter < iter_show1);
boolean save_filtered_high = intermed_high && (niter < iter_show1); boolean save_filtered_high = intermed_high && (niter < iter_show1);
// totals = getRemain(motion_sequence, target_sequence_multi, num_all, num_undef, num_good, num_bad);
totals = getRemain(target_sequence_multi, num_all, num_undef, num_good, num_bad); totals = getRemain(target_sequence_multi, num_all, num_undef, num_good, num_bad);
if (totals[TOTALS_UNDEFINED] == 0) { if (totals[TOTALS_UNDEFINED] == 0) {
if (debugLevel > -4) System.out.println ("No undefined tiles left, breaking loop"); if (debugLevel > -4) System.out.println ("No undefined tiles left, breaking loop");
...@@ -8400,6 +8413,26 @@ public class CuasMotion { ...@@ -8400,6 +8413,26 @@ public class CuasMotion {
// show good and bad accumulated here too? // show good and bad accumulated here too?
} }
// Andrey 05/05/2026 moved here (earlier) so shiftAndRenderAccumulate() will use update motion vector
// By Claude on 05/05/2026 — re-correlate with spatial mask around known target
if (recalc_mv) {
refineMotionVectors(
clt_parameters, // CLTParameters clt_parameters,
batch_mode, // boolean batch_mode,
cuasMotion, // CuasMotion cuasMotion,
recalc_mv_boost, // double recalc_mv_boost,
recalc_mv_r0, // double recalc_mv_r0,
recalc_mv_r1, // double recalc_mv_r1,
fpixels_tum, // float [][] fpixels,
targets_nonoverlap, // double [][][] targets_nonoverlap,
frame0, // int frame0,
corr_inc, // int corr_inc,
half_accum_range, // int half_accum_range,
smooth, // boolean smooth,
corr_offset, // int corr_offset,
debugLevel); // int debugLevel)
}
// perform new accumulations of shifted non-conflicting tiles // perform new accumulations of shifted non-conflicting tiles
float [][] fpixels_accumulated = cuasMotion.shiftAndRenderAccumulate( float [][] fpixels_accumulated = cuasMotion.shiftAndRenderAccumulate(
clt_parameters, // CLTParameters clt_parameters, clt_parameters, // CLTParameters clt_parameters,
...@@ -8443,25 +8476,6 @@ public class CuasMotion { ...@@ -8443,25 +8476,6 @@ public class CuasMotion {
// targets_new will contain motion vectors, centroid, and LMA results combined // targets_new will contain motion vectors, centroid, and LMA results combined
//save_filtered_high //save_filtered_high
// By Claude on 05/05/2026 — re-correlate with spatial mask around known target
if (recalc_mv) {
refineMotionVectors(
clt_parameters, // CLTParameters clt_parameters,
batch_mode, // boolean batch_mode,
cuasMotion, // CuasMotion cuasMotion,
recalc_mv_boost, // double recalc_mv_boost,
recalc_mv_r0, // double recalc_mv_r0,
recalc_mv_r1, // double recalc_mv_r1,
fpixels_tum, // float [][] fpixels,
targets_nonoverlap, // double [][][] targets_nonoverlap,
frame0, // int frame0,
corr_inc, // int corr_inc,
half_accum_range, // int half_accum_range,
smooth, // boolean smooth,
corr_offset, // int corr_offset,
debugLevel); // int debugLevel)
}
float [][] accum_debug = save_filtered_high? new float [num_corr_samples][]:null; //fpixels_accumulated.length] float [][] accum_debug = save_filtered_high? new float [num_corr_samples][]:null; //fpixels_accumulated.length]
boolean keep_failed = false; // keep failed targets boolean keep_failed = false; // keep failed targets
double [][][][] targets_new_multi = getAccumulatedCoordinatesMulti( double [][][][] targets_new_multi = getAccumulatedCoordinatesMulti(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment