Commit 872107bf authored by Andrey Filippov's avatar Andrey Filippov

Claude: refineMotionVectors() — multithreaded masking + pre-computed kernel

- Pre-compute integer-pixel raised-cosine mask kernel once before nseq loop
- Allocate fpixels_masked[nframes][width*height] once; clear per nseq with Arrays.fill
- Restructure masking: parallel outer loop over frames (nFr via AtomicInteger),
  inner loop over tiles — no write contention (non-overlapping tile guarantee)
- Pre-extract tile center/velocity into arrays before thread launch
- Replace new ImagePlus(FloatProcessor) debug calls with ShowDoubleFloatArrays.showArrays()
- Fix "centre" -> "center" in comments; naming: nFr for ai.getAndIncrement(), ntile for plain for
parent c76920b7
......@@ -2309,34 +2309,36 @@ public class CuasMotion {
final int n_recenter = clt_parameters.imp.cuas_n_recenter;
final double rstr = clt_parameters.imp.cuas_rstr;
// Boosted pair count, centred on frame_center
// Boosted pair count, centered on frame_center
final int corr_pairs_ref = (int) Math.round(2 * half_accum_range * recalc_mv_boost);
// Hard-coded debug selectors: set >= 0 to enable per-scan/per-tile visualisation
final int dbg_nseq = -1;
final int dbg_tile = -1;
final int dbg_nseq = 20; // -1;
final int dbg_tile = 51+38*80; // -1;
// Show the mask weight profile once
if ((dbg_nseq >= 0 || dbg_tile >= 0) && debugLevel >= 0) {
int r1i = (int) Math.ceil(recalc_mv_r1);
int msize = 2 * r1i + 1;
float[] mask_img = new float[msize * msize];
// Pre-compute integer-pixel raised-cosine mask kernel once (shared across all nseq)
final int r1i = (int) Math.ceil(recalc_mv_r1);
final int mside = 2 * r1i + 1;
final float[] mask_kernel = new float[mside * mside];
for (int dy = -r1i; dy <= r1i; dy++) {
for (int dx = -r1i; dx <= r1i; dx++) {
double r = Math.sqrt((double)(dx * dx + dy * dy));
double w;
if (r <= recalc_mv_r0) w = 1.0;
else if (r >= recalc_mv_r1) w = 0.0;
else w = 0.5 * (Math.cos(Math.PI * (r - recalc_mv_r0) / (recalc_mv_r1 - recalc_mv_r0)) + 1.0);
mask_img[(dy + r1i) * msize + (dx + r1i)] = (float) w;
double r = Math.sqrt(dx * dx + dy * dy);
float w;
if (r <= recalc_mv_r0) w = 1.0f;
else if (r >= recalc_mv_r1) w = 0.0f;
else w = (float)(0.5 * (Math.cos(Math.PI * (r - recalc_mv_r0) / (recalc_mv_r1 - recalc_mv_r0)) + 1.0));
mask_kernel[(dy + r1i) * mside + (dx + r1i)] = w;
}
}
new ImagePlus("refineMotionVectors-mask-r0_" + recalc_mv_r0 + "-r1_" + recalc_mv_r1,
new FloatProcessor(msize, msize, mask_img)).show();
if ((dbg_nseq >= 0 || dbg_tile >= 0) && debugLevel >= 0) {
ShowDoubleFloatArrays.showArrays(mask_kernel.clone(), mside, mside,
"refineMotionVectors-mask-r0_" + recalc_mv_r0 + "-r1_" + recalc_mv_r1);
}
final int r1i = (int) Math.ceil(recalc_mv_r1) + 1; // pixel search radius
float[][] fpixels_masked = new float[nframes][]; // lazy-allocated per nseq
// Allocate staging array once; each nseq clears only the frames it needs
final float[][] fpixels_masked = new float[nframes][width * height];
final Thread[] threads = ImageDtt.newThreadArray();
final AtomicInteger ai = new AtomicInteger(0);
for (int nseq = 0; nseq < targets_nonoverlap.length; nseq++) {
// Skip scan positions where no target has a known centroid yet
......@@ -2351,7 +2353,7 @@ public class CuasMotion {
if (!has_centered) continue;
int frame_center = frame0 + nseq * corr_inc;
// Centre the boosted correlation window on frame_center
// Center the boosted correlation window on frame_center
int frame0_ref = frame_center - corr_pairs_ref / 2;
int frame1_ref = frame0_ref + corr_offset;
......@@ -2361,58 +2363,70 @@ public class CuasMotion {
int fmax_alloc = Math.min(nframes - 1, frame1_ref + corr_pairs_ref - 1);
if (fmin_alloc > fmax_alloc) continue; // no valid pairs in range
// Reset masked-frame array for this scan, allocate zero arrays for the window
for (int f = 0; f < nframes; f++) fpixels_masked[f] = null;
for (int f = fmin_alloc; f <= fmax_alloc; f++) {
fpixels_masked[f] = new float[width * height];
}
// Apply the raised-cosine mask around each target, tracking its motion
for (int ntile = 0; ntile < targets_nonoverlap[nseq].length; ntile++) {
// Clear staging frames for this scan position
for (int f = fmin_alloc; f <= fmax_alloc; f++) Arrays.fill(fpixels_masked[f], 0.0f);
// Pre-extract tile center/velocity for thread-safe parallel access
final int ntiles = targets_nonoverlap[nseq].length;
final boolean[] tile_valid = new boolean[ntiles];
final double[] tcx0 = new double[ntiles];
final double[] tcy0 = new double[ntiles];
final double[] tvx = new double[ntiles];
final double[] tvy = new double[ntiles];
for (int ntile = 0; ntile < ntiles; ntile++) {
double[] target = targets_nonoverlap[nseq][ntile];
if (target == null || Double.isNaN(target[CuasMotionLMA.RSLT_X])) continue;
int tileX = ntile % tilesX;
int tileY = ntile / tilesX;
// Target centre in image pixels at frame_center
double cx0 = tileX * tileSize + tileSize / 2.0 + target[CuasMotionLMA.RSLT_X];
double cy0 = tileY * tileSize + tileSize / 2.0 + target[CuasMotionLMA.RSLT_Y];
double vx = target[CuasMotionLMA.RSLT_VX]; // pixels per corr_offset frames
double vy = target[CuasMotionLMA.RSLT_VY];
tile_valid[ntile] = true;
tcx0[ntile] = (ntile % tilesX) * tileSize + tileSize / 2.0 + target[CuasMotionLMA.RSLT_X];
tcy0[ntile] = (ntile / tilesX) * tileSize + tileSize / 2.0 + target[CuasMotionLMA.RSLT_Y];
tvx[ntile] = target[CuasMotionLMA.RSLT_VX];
tvy[ntile] = target[CuasMotionLMA.RSLT_VY];
}
for (int f = fmin_alloc; f <= fmax_alloc; f++) {
double offset_scale = (double)(f - frame_center) / corr_offset;
double cx = cx0 + vx * offset_scale;
double cy = cy0 + vy * offset_scale;
int icx = (int) Math.round(cx);
int icy = (int) Math.round(cy);
double dcx = cx - icx;
double dcy = cy - icy;
// Parallel over frames: each thread applies all tile masks to one frame
final int fmin_f = fmin_alloc;
final int frange = fmax_alloc - fmin_f + 1;
final int fcenter = frame_center;
ai.set(0);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int nFr = ai.getAndIncrement(); nFr < frange; nFr = ai.getAndIncrement()) {
int f = fmin_f + nFr;
float[] src = fpixels[f];
float[] dst = fpixels_masked[f];
double offset_scale = (double)(f - fcenter) / corr_offset;
for (int ntile = 0; ntile < ntiles; ntile++) {
if (!tile_valid[ntile]) continue;
double cx = tcx0[ntile] + tvx[ntile] * offset_scale;
double cy = tcy0[ntile] + tvy[ntile] * offset_scale;
int icx = (int) Math.round(cx);
int icy = (int) Math.round(cy);
for (int dy = -r1i; dy <= r1i; dy++) {
int py = icy + dy;
if (py < 0 || py >= height) continue;
for (int dx = -r1i; dx <= r1i; dx++) {
int px = icx + dx;
if (px < 0 || px >= width) continue;
double r = Math.sqrt((dx - dcx) * (dx - dcx) + (dy - dcy) * (dy - dcy));
if (r >= recalc_mv_r1) continue;
double w = (r <= recalc_mv_r0) ? 1.0 :
0.5 * (Math.cos(Math.PI * (r - recalc_mv_r0) / (recalc_mv_r1 - recalc_mv_r0)) + 1.0);
dst[py * width + px] += (float)(w * src[py * width + px]);
float w = mask_kernel[(dy + r1i) * mside + (dx + r1i)];
if (w == 0.0f) continue;
dst[py * width + px] += w * src[py * width + px];
}
}
}
}
}
};
}
ImageDtt.startAndJoin(threads);
// Debug: show the masked source frame at frame_center for this scan
if (nseq == dbg_nseq && debugLevel >= 0) {
int f_show = Math.max(fmin_alloc, Math.min(fmax_alloc, frame_center));
new ImagePlus("refineMotionVectors-masked-nseq" + nseq + "-f" + f_show,
new FloatProcessor(width, height, fpixels_masked[f_show].clone())).show();
// Also show the same frame unmasked for comparison
new ImagePlus("refineMotionVectors-source-nseq" + nseq + "-f" + f_show,
new FloatProcessor(width, height, fpixels[f_show].clone())).show();
ShowDoubleFloatArrays.showArrays(fpixels_masked[f_show].clone(), width, height,
"refineMotionVectors-masked-nseq" + nseq + "-f" + f_show);
ShowDoubleFloatArrays.showArrays(fpixels[f_show], width, height,
"refineMotionVectors-source-nseq" + nseq + "-f" + f_show);
}
TDCorrTile[] tdCorrTiles = cuasMotion.correlatePairs(
......@@ -2447,8 +2461,8 @@ public class CuasMotion {
int corr_side = 2 * GPUTileProcessor.DTT_SIZE - 1; // 15
float[] corr_img = new float[corr_side * corr_side];
for (int i = 0; i < corr_img.length; i++) corr_img[i] = (float) corr_tiles_pd[dbg_tile][i];
new ImagePlus("refineMotionVectors-corr2d-nseq" + nseq + "-tile" + dbg_tile,
new FloatProcessor(corr_side, corr_side, corr_img)).show();
ShowDoubleFloatArrays.showArrays(corr_img, corr_side, corr_side,
"refineMotionVectors-corr2d-nseq" + nseq + "-tile" + dbg_tile);
}
// Add differential MV to targets_nonoverlap in-place
......@@ -8308,7 +8322,6 @@ public class CuasMotion {
boolean save_filtered_low = intermed_low && (niter < iter_show1);
boolean save_filtered_high = intermed_high && (niter < iter_show1);
// totals = getRemain(motion_sequence, target_sequence_multi, num_all, num_undef, num_good, num_bad);
totals = getRemain(target_sequence_multi, num_all, num_undef, num_good, num_bad);
if (totals[TOTALS_UNDEFINED] == 0) {
if (debugLevel > -4) System.out.println ("No undefined tiles left, breaking loop");
......@@ -8400,6 +8413,26 @@ public class CuasMotion {
// show good and bad accumulated here too?
}
// Andrey 05/05/2026 moved here (earlier) so shiftAndRenderAccumulate() will use update motion vector
// By Claude on 05/05/2026 — re-correlate with spatial mask around known target
if (recalc_mv) {
refineMotionVectors(
clt_parameters, // CLTParameters clt_parameters,
batch_mode, // boolean batch_mode,
cuasMotion, // CuasMotion cuasMotion,
recalc_mv_boost, // double recalc_mv_boost,
recalc_mv_r0, // double recalc_mv_r0,
recalc_mv_r1, // double recalc_mv_r1,
fpixels_tum, // float [][] fpixels,
targets_nonoverlap, // double [][][] targets_nonoverlap,
frame0, // int frame0,
corr_inc, // int corr_inc,
half_accum_range, // int half_accum_range,
smooth, // boolean smooth,
corr_offset, // int corr_offset,
debugLevel); // int debugLevel)
}
// perform new accumulations of shifted non-conflicting tiles
float [][] fpixels_accumulated = cuasMotion.shiftAndRenderAccumulate(
clt_parameters, // CLTParameters clt_parameters,
......@@ -8443,25 +8476,6 @@ public class CuasMotion {
// targets_new will contain motion vectors, centroid, and LMA results combined
//save_filtered_high
// By Claude on 05/05/2026 — re-correlate with spatial mask around known target
if (recalc_mv) {
refineMotionVectors(
clt_parameters, // CLTParameters clt_parameters,
batch_mode, // boolean batch_mode,
cuasMotion, // CuasMotion cuasMotion,
recalc_mv_boost, // double recalc_mv_boost,
recalc_mv_r0, // double recalc_mv_r0,
recalc_mv_r1, // double recalc_mv_r1,
fpixels_tum, // float [][] fpixels,
targets_nonoverlap, // double [][][] targets_nonoverlap,
frame0, // int frame0,
corr_inc, // int corr_inc,
half_accum_range, // int half_accum_range,
smooth, // boolean smooth,
corr_offset, // int corr_offset,
debugLevel); // int debugLevel)
}
float [][] accum_debug = save_filtered_high? new float [num_corr_samples][]:null; //fpixels_accumulated.length]
boolean keep_failed = false; // keep failed targets
double [][][][] targets_new_multi = getAccumulatedCoordinatesMulti(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment