CLAUDE: Add ROI variant of convolve3d(), fix OOM in 5D fine-velocity stage

- CuasRTUtils: add convolve3d(data, Rectangle roi, result_in) that allocates only [roi.width*roi.height][nsub][nvel] instead of full [width*height][nsub][nvel], skips non-ROI pixels in center/margin loops, uses roi-relative indexing. Also fix showConvKernel5d(data, roi, ...) pixel_idx to use ROI-relative (py_roi*roi.width+px_roi) instead of full-image indexing. - CuasDetectRT: add dpixels_5d_roi_pyramid alongside dpixels_5d_pyramid. Level-0 and pyramid loop 5D blocks now call convolve3d(window, null) only when curt_save_c5full, and convolve3d(window, curt_save_select, null) only when curt_save_c5rect. RECT save blocks now read from dpixels_5d_roi_pyramid. Eliminates the OutOfMemoryError when curt_save_c5full=false. Co-authored-by: Claude <claude@elphel.com>

CLAUDE: Add ROI variant of convolve3d(), fix OOM in 5D fine-velocity stage
- CuasRTUtils: add convolve3d(data, Rectangle roi, result_in) that allocates only [roi.width*roi.height][nsub][nvel] instead of full [width*height][nsub][nvel], skips non-ROI pixels in center/margin loops, uses roi-relative indexing. Also fix showConvKernel5d(data, roi, ...) pixel_idx to use ROI-relative (py_roi*roi.width+px_roi) instead of full-image indexing. - CuasDetectRT: add dpixels_5d_roi_pyramid alongside dpixels_5d_pyramid. Level-0 and pyramid loop 5D blocks now call convolve3d(window, null) only when curt_save_c5full, and convolve3d(window, curt_save_select, null) only when curt_save_c5rect. RECT save blocks now read from dpixels_5d_roi_pyramid. Eliminates the OutOfMemoryError when curt_save_c5full=false. Co-authored-by: Claude <claude@elphel.com>
b4bfe9b1 · Andrey Filippov · eb6a764f · b4bfe9b1 · b4bfe9b1
Commit b4bfe9b1 authored Jun 07, 2026 by Andrey Filippov
Show whitespace changes
Inline Side-by-side

Showing with 137 additions and 10 deletions

CuasDetectRT.java src/main/java/com/elphel/imagej/cuas/rt/CuasDetectRT.java +22 -7

CuasRTUtils.java src/main/java/com/elphel/imagej/cuas/rt/CuasRTUtils.java +115 -3

No files found.
--- a/src/main/java/com/elphel/imagej/cuas/rt/CuasDetectRT.java
+++ b/src/main/java/com/elphel/imagej/cuas/rt/CuasDetectRT.java
@@ -270,6 +270,7 @@ public class CuasDetectRT {
   		String [][]     ts_pyramid =          new String [pyramid_levels][];
   		double [][][][] dpixels_3d3_pyramid = new double [pyramid_levels][][][];
   		double [][][][][] dpixels_5d_pyramid =     new double [pyramid_levels][][][][];
+   		double [][][][][] dpixels_5d_roi_pyramid = new double [pyramid_levels][][][][];
   		ts_pyramid[0] =          getTimeStamps(1);
   		dpixels_3d3_pyramid[0] = dpixels_3d3;
   		dpixels_pyramid[0] = new double[dpixels_log.length-1][width*height];
@@ -290,13 +291,21 @@ public class CuasDetectRT {
   		// Level-0 5D convolution (fine velocity from coarse-velocity history)
   		int num_5d_lev0 = Math.max(0, dpixels_3d3.length - num_hist_5d + 1);
   		dpixels_5d_pyramid[0] =     new double[num_5d_lev0][][][];
+   		dpixels_5d_roi_pyramid[0] = new double[num_5d_lev0][][][];
   		String[] ts_5d_lev0 = new String[num_5d_lev0];
+   		System.out.println("detectTargets(): will run convolve3d()");
   		for (int n5d = 0; n5d < num_5d_lev0; n5d++) {
+   			System.out.print ("n5d="+n5d+" ");
   			double[][][] window = new double[num_hist_5d][][];
   			for (int h = 0; h < num_hist_5d; h++) {
   				window[h] = dpixels_3d3[n5d + num_hist_5d - 1 - h];
   			}
+   			if (curt_save_c5full) {
   				dpixels_5d_pyramid[0][n5d] = cuasRTUtils.convolve3d(window, null);
+   			}
+   			if (curt_save_c5rect && (curt_save_select != null)) {
+   				dpixels_5d_roi_pyramid[0][n5d] = cuasRTUtils.convolve3d(window, curt_save_select, null);
+   			}
   			ts_5d_lev0[n5d] = ts_pyramid[0][n5d + num_hist_5d - 1];
   		}
   		if (save_5d_pixels && num_5d_lev0 > 0) {
@@ -308,7 +317,7 @@ public class CuasDetectRT {
   		}
   		if (curt_save_c5rect && num_5d_lev0 > 0) {
   			ImagePlus imp_5d_rect = cuasRTUtils.showConvKernel5d(
-   					dpixels_5d_pyramid[0],
+   					dpixels_5d_roi_pyramid[0],
   					curt_save_select,
   					ts_5d_lev0,
   					title_conv5d+"-RECT");
@@ -366,13 +375,19 @@ public class CuasDetectRT {
   		   		// 5D convolution for pyramid level nlev+1
   		   		int num_5d_lev = Math.max(0, dpixels_3d3_pyramid[nlev+1].length - num_hist_5d + 1);
   		   		dpixels_5d_pyramid[nlev+1] =     new double[num_5d_lev][][][];
+   		   		dpixels_5d_roi_pyramid[nlev+1] = new double[num_5d_lev][][][];
   		   		String[] ts_5d_lev = new String[num_5d_lev];
   		   		for (int n5d = 0; n5d < num_5d_lev; n5d++) {
   		   			double[][][] window = new double[num_hist_5d][][];
   		   			for (int h = 0; h < num_hist_5d; h++) {
   		   				window[h] = dpixels_3d3_pyramid[nlev+1][n5d + num_hist_5d - 1 - h];
   		   			}
+   		   			if (curt_save_c5full) {
   		   				dpixels_5d_pyramid[nlev+1][n5d] = cuasRTUtils.convolve3d(window, null);
+   		   			}
+   		   			if (curt_save_c5rect && (curt_save_select != null)) {
+   		   				dpixels_5d_roi_pyramid[nlev+1][n5d] = cuasRTUtils.convolve3d(window, curt_save_select, null);
+   		   			}
   		   			ts_5d_lev[n5d] = ts_pyramid[nlev+1][n5d + num_hist_5d - 1];
   		   		}
   		   		if (save_5d_pixels && num_5d_lev > 0) {
@@ -384,7 +399,7 @@ public class CuasDetectRT {
   		   		}
   		   		if (curt_save_c5rect && num_5d_lev > 0) {
   		   			ImagePlus imp_5d_rect = cuasRTUtils.showConvKernel5d(
-   		   					dpixels_5d_pyramid[nlev+1],
+   		   					dpixels_5d_roi_pyramid[nlev+1],
   		   					curt_save_select,
   		   					ts_5d_lev,
   		   					title_conv5d+"-LEV"+(nlev+1)+"-RECT");

--- a/src/main/java/com/elphel/imagej/cuas/rt/CuasRTUtils.java
+++ b/src/main/java/com/elphel/imagej/cuas/rt/CuasRTUtils.java
@@ -200,7 +200,9 @@ public class CuasRTUtils {
 		final AtomicInteger ai = new AtomicInteger(0);
 		final int [] coarse_vel_indx = consolidationKernel.getCoarseVIdx();
 		final int decimate_size = pixel_decimate*pixel_decimate;
+		System.out.print("convolve3d() nhist=");
 		for (int nhist = 0; nhist < nlayers; nhist++) {
+			System.out.print(nhist+" ");
 			final int fhist = nhist;
 			final int spat_rad = consolidationKernel.getSpatialRadius(); // may be later dependent on nhist for efficiency 
 			final double [][][][] kernel_hist = kernel[fhist];
@@ -288,10 +290,120 @@ public class CuasRTUtils {
 				};
 			}		      
 			ImageDtt.startAndJoin(threads);
+		}
+		System.out.println();
+		return result;
+	}

+	/**
+	 * ROI variant: compute fine-velocity convolution for a pixel rectangle only.
+	 * Result is indexed ROI-relatively: result[roi_pix][sub][vel] where
+	 * roi_pix = (target_y - roi.y) * roi.width + (target_x - roi.x).
+	 * Pixels outside roi are skipped; full-image source data is still used for
+	 * the spatial neighbourhood reads.
+	 */
+	public double [][][] convolve3d(
+			final double [][][] data,
+			final Rectangle     roi,
+			double [][][]       result_in) {
+		final int vel_size = 2*velocity_radius + 1;
+		final int nlayers = data.length;
+		final int roi_npix = roi.width * roi.height;
+		final double [][][] result = (result_in != null) ? result_in :
+				new double [roi_npix][pixel_decimate*pixel_decimate][vel_size * vel_size];
+		final double [][][][][] kernel = consolidationKernel.getKernel();
+		final Thread[] threads = ImageDtt.newThreadArray();
+		final AtomicInteger ai = new AtomicInteger(0);
+		final int [] coarse_vel_indx = consolidationKernel.getCoarseVIdx();
+		final int decimate_size = pixel_decimate*pixel_decimate;
+		final int roi_x0 = roi.x, roi_y0 = roi.y, roi_x1 = roi.x + roi.width, roi_y1 = roi.y + roi.height;
+		for (int nhist = 0; nhist < nlayers; nhist++) {
+			final int fhist = nhist;
+			final int spat_rad = consolidationKernel.getSpatialRadius();
+			final double [][][][] kernel_hist = kernel[fhist];
+			final double [][]     data_hist = data[fhist];
+			ai.set(0);
+			// center pixels — no source boundary checks needed
+			for (int ithread = 0; ithread < threads.length; ithread++) {
+				threads[ithread] = new Thread() {
+					public void run() {
+						for (int nPix = ai.getAndIncrement(); nPix < indx_center_5d.length; nPix = ai.getAndIncrement()) {
+							int ipix_dst = indx_center_5d[nPix];
+							int target_x = ipix_dst % width;
+							int target_y = ipix_dst / width;
+							if (target_x < roi_x0 || target_x >= roi_x1 ||
+								target_y < roi_y0 || target_y >= roi_y1) continue;
+							int roi_pix = (target_y - roi_y0) * roi.width + (target_x - roi_x0);
+							for (int sub_idx = 0; sub_idx < decimate_size; sub_idx++) {
+								for (int v_out_idx = 0; v_out_idx < kernel_hist.length; v_out_idx++) {
+									int src_coarse_v = coarse_vel_indx[v_out_idx];
+									if (src_coarse_v >= 0) {
+										double accumulated_flux = 0.0;
+										for (int dy = -spat_rad; dy <= spat_rad; dy++) {
+											int src_y = target_y + dy;
+											for (int dx = -spat_rad; dx <= spat_rad; dx++) {
+												int src_x = target_x + dx;
+												int src_pixel_idx = src_y * width + src_x;
+												double historical_value = data_hist[src_pixel_idx][src_coarse_v];
+												int k_dy = dy + spat_rad;
+												int k_dx = dx + spat_rad;
+												double weight = kernel_hist[v_out_idx][sub_idx][k_dy][k_dx];
+												accumulated_flux += historical_value * weight;
+											}
+										}
+										result[roi_pix][sub_idx][v_out_idx] += accumulated_flux;
+									}
+								}
+							}
+						}
+					}
+				};
+			}
+			ImageDtt.startAndJoin(threads);
+			ai.set(0);
+			// margin pixels — source boundary checks required
+			for (int ithread = 0; ithread < threads.length; ithread++) {
+				threads[ithread] = new Thread() {
+					public void run() {
+						for (int nPix = ai.getAndIncrement(); nPix < indx_margins_5d.length; nPix = ai.getAndIncrement()) {
+							int ipix_dst = indx_margins_5d[nPix];
+							int target_x = ipix_dst % width;
+							int target_y = ipix_dst / width;
+							if (target_x < roi_x0 || target_x >= roi_x1 ||
+								target_y < roi_y0 || target_y >= roi_y1) continue;
+							int roi_pix = (target_y - roi_y0) * roi.width + (target_x - roi_x0);
+							for (int sub_idx = 0; sub_idx < decimate_size; sub_idx++) {
+								for (int v_out_idx = 0; v_out_idx < kernel_hist.length; v_out_idx++) {
+									int src_coarse_v = coarse_vel_indx[v_out_idx];
+									if (src_coarse_v >= 0) {
+										double accumulated_flux = 0.0;
+										for (int dy = -spat_rad; dy <= spat_rad; dy++) {
+											int src_y = target_y + dy;
+											for (int dx = -spat_rad; dx <= spat_rad; dx++) {
+												int src_x = target_x + dx;
+												if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) {
+													int src_pixel_idx = src_y * width + src_x;
+													double historical_value = data_hist[src_pixel_idx][src_coarse_v];
+													int k_dy = dy + spat_rad;
+													int k_dx = dx + spat_rad;
+													double weight = kernel_hist[v_out_idx][sub_idx][k_dy][k_dx];
+													accumulated_flux += historical_value * weight;
+												}
+											}
+										}
+										result[roi_pix][sub_idx][v_out_idx] += accumulated_flux;
+									}
+								}
+							}
+						}
+					}
+				};
+			}
+			ImageDtt.startAndJoin(threads);
 		}
 		return result;
 	}
+
 	public double[][] convolve3D3LReLU(
 			final double [] data,
 			final double [] data_prev,
@@ -1121,7 +1233,7 @@ public class CuasRTUtils {
    			for (int px_roi = 0; px_roi < roi.width; px_roi++) {
    				int px_img = roi.x + px_roi;
    				if (px_img < 0 || px_img >= width) continue;
-    				int pixel_idx = py_img * width + px_img;
+    				int pixel_idx = py_roi * roi.width + px_roi;
    				int block_x   = px_roi * block_w;
    				int block_y   = py_roi * block_h;
    				double [][]   pix_data = data[nscene][pixel_idx]; // [nsub][nvel]