Commit b4bfe9b1 authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: Add ROI variant of convolve3d(), fix OOM in 5D fine-velocity stage

- CuasRTUtils: add convolve3d(data, Rectangle roi, result_in) that allocates
  only [roi.width*roi.height][nsub][nvel] instead of full [width*height][nsub][nvel],
  skips non-ROI pixels in center/margin loops, uses roi-relative indexing.
  Also fix showConvKernel5d(data, roi, ...) pixel_idx to use ROI-relative
  (py_roi*roi.width+px_roi) instead of full-image indexing.

- CuasDetectRT: add dpixels_5d_roi_pyramid alongside dpixels_5d_pyramid.
  Level-0 and pyramid loop 5D blocks now call convolve3d(window, null) only
  when curt_save_c5full, and convolve3d(window, curt_save_select, null) only
  when curt_save_c5rect. RECT save blocks now read from dpixels_5d_roi_pyramid.
  Eliminates the OutOfMemoryError when curt_save_c5full=false.
Co-authored-by: 's avatarClaude <claude@elphel.com>
parent eb6a764f
...@@ -270,6 +270,7 @@ public class CuasDetectRT { ...@@ -270,6 +270,7 @@ public class CuasDetectRT {
String [][] ts_pyramid = new String [pyramid_levels][]; String [][] ts_pyramid = new String [pyramid_levels][];
double [][][][] dpixels_3d3_pyramid = new double [pyramid_levels][][][]; double [][][][] dpixels_3d3_pyramid = new double [pyramid_levels][][][];
double [][][][][] dpixels_5d_pyramid = new double [pyramid_levels][][][][]; double [][][][][] dpixels_5d_pyramid = new double [pyramid_levels][][][][];
double [][][][][] dpixels_5d_roi_pyramid = new double [pyramid_levels][][][][];
ts_pyramid[0] = getTimeStamps(1); ts_pyramid[0] = getTimeStamps(1);
dpixels_3d3_pyramid[0] = dpixels_3d3; dpixels_3d3_pyramid[0] = dpixels_3d3;
dpixels_pyramid[0] = new double[dpixels_log.length-1][width*height]; dpixels_pyramid[0] = new double[dpixels_log.length-1][width*height];
...@@ -290,13 +291,21 @@ public class CuasDetectRT { ...@@ -290,13 +291,21 @@ public class CuasDetectRT {
// Level-0 5D convolution (fine velocity from coarse-velocity history) // Level-0 5D convolution (fine velocity from coarse-velocity history)
int num_5d_lev0 = Math.max(0, dpixels_3d3.length - num_hist_5d + 1); int num_5d_lev0 = Math.max(0, dpixels_3d3.length - num_hist_5d + 1);
dpixels_5d_pyramid[0] = new double[num_5d_lev0][][][]; dpixels_5d_pyramid[0] = new double[num_5d_lev0][][][];
dpixels_5d_roi_pyramid[0] = new double[num_5d_lev0][][][];
String[] ts_5d_lev0 = new String[num_5d_lev0]; String[] ts_5d_lev0 = new String[num_5d_lev0];
System.out.println("detectTargets(): will run convolve3d()");
for (int n5d = 0; n5d < num_5d_lev0; n5d++) { for (int n5d = 0; n5d < num_5d_lev0; n5d++) {
System.out.print ("n5d="+n5d+" ");
double[][][] window = new double[num_hist_5d][][]; double[][][] window = new double[num_hist_5d][][];
for (int h = 0; h < num_hist_5d; h++) { for (int h = 0; h < num_hist_5d; h++) {
window[h] = dpixels_3d3[n5d + num_hist_5d - 1 - h]; window[h] = dpixels_3d3[n5d + num_hist_5d - 1 - h];
} }
if (curt_save_c5full) {
dpixels_5d_pyramid[0][n5d] = cuasRTUtils.convolve3d(window, null); dpixels_5d_pyramid[0][n5d] = cuasRTUtils.convolve3d(window, null);
}
if (curt_save_c5rect && (curt_save_select != null)) {
dpixels_5d_roi_pyramid[0][n5d] = cuasRTUtils.convolve3d(window, curt_save_select, null);
}
ts_5d_lev0[n5d] = ts_pyramid[0][n5d + num_hist_5d - 1]; ts_5d_lev0[n5d] = ts_pyramid[0][n5d + num_hist_5d - 1];
} }
if (save_5d_pixels && num_5d_lev0 > 0) { if (save_5d_pixels && num_5d_lev0 > 0) {
...@@ -308,7 +317,7 @@ public class CuasDetectRT { ...@@ -308,7 +317,7 @@ public class CuasDetectRT {
} }
if (curt_save_c5rect && num_5d_lev0 > 0) { if (curt_save_c5rect && num_5d_lev0 > 0) {
ImagePlus imp_5d_rect = cuasRTUtils.showConvKernel5d( ImagePlus imp_5d_rect = cuasRTUtils.showConvKernel5d(
dpixels_5d_pyramid[0], dpixels_5d_roi_pyramid[0],
curt_save_select, curt_save_select,
ts_5d_lev0, ts_5d_lev0,
title_conv5d+"-RECT"); title_conv5d+"-RECT");
...@@ -366,13 +375,19 @@ public class CuasDetectRT { ...@@ -366,13 +375,19 @@ public class CuasDetectRT {
// 5D convolution for pyramid level nlev+1 // 5D convolution for pyramid level nlev+1
int num_5d_lev = Math.max(0, dpixels_3d3_pyramid[nlev+1].length - num_hist_5d + 1); int num_5d_lev = Math.max(0, dpixels_3d3_pyramid[nlev+1].length - num_hist_5d + 1);
dpixels_5d_pyramid[nlev+1] = new double[num_5d_lev][][][]; dpixels_5d_pyramid[nlev+1] = new double[num_5d_lev][][][];
dpixels_5d_roi_pyramid[nlev+1] = new double[num_5d_lev][][][];
String[] ts_5d_lev = new String[num_5d_lev]; String[] ts_5d_lev = new String[num_5d_lev];
for (int n5d = 0; n5d < num_5d_lev; n5d++) { for (int n5d = 0; n5d < num_5d_lev; n5d++) {
double[][][] window = new double[num_hist_5d][][]; double[][][] window = new double[num_hist_5d][][];
for (int h = 0; h < num_hist_5d; h++) { for (int h = 0; h < num_hist_5d; h++) {
window[h] = dpixels_3d3_pyramid[nlev+1][n5d + num_hist_5d - 1 - h]; window[h] = dpixels_3d3_pyramid[nlev+1][n5d + num_hist_5d - 1 - h];
} }
if (curt_save_c5full) {
dpixels_5d_pyramid[nlev+1][n5d] = cuasRTUtils.convolve3d(window, null); dpixels_5d_pyramid[nlev+1][n5d] = cuasRTUtils.convolve3d(window, null);
}
if (curt_save_c5rect && (curt_save_select != null)) {
dpixels_5d_roi_pyramid[nlev+1][n5d] = cuasRTUtils.convolve3d(window, curt_save_select, null);
}
ts_5d_lev[n5d] = ts_pyramid[nlev+1][n5d + num_hist_5d - 1]; ts_5d_lev[n5d] = ts_pyramid[nlev+1][n5d + num_hist_5d - 1];
} }
if (save_5d_pixels && num_5d_lev > 0) { if (save_5d_pixels && num_5d_lev > 0) {
...@@ -384,7 +399,7 @@ public class CuasDetectRT { ...@@ -384,7 +399,7 @@ public class CuasDetectRT {
} }
if (curt_save_c5rect && num_5d_lev > 0) { if (curt_save_c5rect && num_5d_lev > 0) {
ImagePlus imp_5d_rect = cuasRTUtils.showConvKernel5d( ImagePlus imp_5d_rect = cuasRTUtils.showConvKernel5d(
dpixels_5d_pyramid[nlev+1], dpixels_5d_roi_pyramid[nlev+1],
curt_save_select, curt_save_select,
ts_5d_lev, ts_5d_lev,
title_conv5d+"-LEV"+(nlev+1)+"-RECT"); title_conv5d+"-LEV"+(nlev+1)+"-RECT");
......
...@@ -200,7 +200,9 @@ public class CuasRTUtils { ...@@ -200,7 +200,9 @@ public class CuasRTUtils {
final AtomicInteger ai = new AtomicInteger(0); final AtomicInteger ai = new AtomicInteger(0);
final int [] coarse_vel_indx = consolidationKernel.getCoarseVIdx(); final int [] coarse_vel_indx = consolidationKernel.getCoarseVIdx();
final int decimate_size = pixel_decimate*pixel_decimate; final int decimate_size = pixel_decimate*pixel_decimate;
System.out.print("convolve3d() nhist=");
for (int nhist = 0; nhist < nlayers; nhist++) { for (int nhist = 0; nhist < nlayers; nhist++) {
System.out.print(nhist+" ");
final int fhist = nhist; final int fhist = nhist;
final int spat_rad = consolidationKernel.getSpatialRadius(); // may be later dependent on nhist for efficiency final int spat_rad = consolidationKernel.getSpatialRadius(); // may be later dependent on nhist for efficiency
final double [][][][] kernel_hist = kernel[fhist]; final double [][][][] kernel_hist = kernel[fhist];
...@@ -288,10 +290,120 @@ public class CuasRTUtils { ...@@ -288,10 +290,120 @@ public class CuasRTUtils {
}; };
} }
ImageDtt.startAndJoin(threads); ImageDtt.startAndJoin(threads);
}
System.out.println();
return result;
}
/**
* ROI variant: compute fine-velocity convolution for a pixel rectangle only.
* Result is indexed ROI-relatively: result[roi_pix][sub][vel] where
* roi_pix = (target_y - roi.y) * roi.width + (target_x - roi.x).
* Pixels outside roi are skipped; full-image source data is still used for
* the spatial neighbourhood reads.
*/
public double [][][] convolve3d(
final double [][][] data,
final Rectangle roi,
double [][][] result_in) {
final int vel_size = 2*velocity_radius + 1;
final int nlayers = data.length;
final int roi_npix = roi.width * roi.height;
final double [][][] result = (result_in != null) ? result_in :
new double [roi_npix][pixel_decimate*pixel_decimate][vel_size * vel_size];
final double [][][][][] kernel = consolidationKernel.getKernel();
final Thread[] threads = ImageDtt.newThreadArray();
final AtomicInteger ai = new AtomicInteger(0);
final int [] coarse_vel_indx = consolidationKernel.getCoarseVIdx();
final int decimate_size = pixel_decimate*pixel_decimate;
final int roi_x0 = roi.x, roi_y0 = roi.y, roi_x1 = roi.x + roi.width, roi_y1 = roi.y + roi.height;
for (int nhist = 0; nhist < nlayers; nhist++) {
final int fhist = nhist;
final int spat_rad = consolidationKernel.getSpatialRadius();
final double [][][][] kernel_hist = kernel[fhist];
final double [][] data_hist = data[fhist];
ai.set(0);
// center pixels — no source boundary checks needed
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int nPix = ai.getAndIncrement(); nPix < indx_center_5d.length; nPix = ai.getAndIncrement()) {
int ipix_dst = indx_center_5d[nPix];
int target_x = ipix_dst % width;
int target_y = ipix_dst / width;
if (target_x < roi_x0 || target_x >= roi_x1 ||
target_y < roi_y0 || target_y >= roi_y1) continue;
int roi_pix = (target_y - roi_y0) * roi.width + (target_x - roi_x0);
for (int sub_idx = 0; sub_idx < decimate_size; sub_idx++) {
for (int v_out_idx = 0; v_out_idx < kernel_hist.length; v_out_idx++) {
int src_coarse_v = coarse_vel_indx[v_out_idx];
if (src_coarse_v >= 0) {
double accumulated_flux = 0.0;
for (int dy = -spat_rad; dy <= spat_rad; dy++) {
int src_y = target_y + dy;
for (int dx = -spat_rad; dx <= spat_rad; dx++) {
int src_x = target_x + dx;
int src_pixel_idx = src_y * width + src_x;
double historical_value = data_hist[src_pixel_idx][src_coarse_v];
int k_dy = dy + spat_rad;
int k_dx = dx + spat_rad;
double weight = kernel_hist[v_out_idx][sub_idx][k_dy][k_dx];
accumulated_flux += historical_value * weight;
}
}
result[roi_pix][sub_idx][v_out_idx] += accumulated_flux;
}
}
}
}
}
};
}
ImageDtt.startAndJoin(threads);
ai.set(0);
// margin pixels — source boundary checks required
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int nPix = ai.getAndIncrement(); nPix < indx_margins_5d.length; nPix = ai.getAndIncrement()) {
int ipix_dst = indx_margins_5d[nPix];
int target_x = ipix_dst % width;
int target_y = ipix_dst / width;
if (target_x < roi_x0 || target_x >= roi_x1 ||
target_y < roi_y0 || target_y >= roi_y1) continue;
int roi_pix = (target_y - roi_y0) * roi.width + (target_x - roi_x0);
for (int sub_idx = 0; sub_idx < decimate_size; sub_idx++) {
for (int v_out_idx = 0; v_out_idx < kernel_hist.length; v_out_idx++) {
int src_coarse_v = coarse_vel_indx[v_out_idx];
if (src_coarse_v >= 0) {
double accumulated_flux = 0.0;
for (int dy = -spat_rad; dy <= spat_rad; dy++) {
int src_y = target_y + dy;
for (int dx = -spat_rad; dx <= spat_rad; dx++) {
int src_x = target_x + dx;
if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) {
int src_pixel_idx = src_y * width + src_x;
double historical_value = data_hist[src_pixel_idx][src_coarse_v];
int k_dy = dy + spat_rad;
int k_dx = dx + spat_rad;
double weight = kernel_hist[v_out_idx][sub_idx][k_dy][k_dx];
accumulated_flux += historical_value * weight;
}
}
}
result[roi_pix][sub_idx][v_out_idx] += accumulated_flux;
}
}
}
}
}
};
}
ImageDtt.startAndJoin(threads);
} }
return result; return result;
} }
public double[][] convolve3D3LReLU( public double[][] convolve3D3LReLU(
final double [] data, final double [] data,
final double [] data_prev, final double [] data_prev,
...@@ -1121,7 +1233,7 @@ public class CuasRTUtils { ...@@ -1121,7 +1233,7 @@ public class CuasRTUtils {
for (int px_roi = 0; px_roi < roi.width; px_roi++) { for (int px_roi = 0; px_roi < roi.width; px_roi++) {
int px_img = roi.x + px_roi; int px_img = roi.x + px_roi;
if (px_img < 0 || px_img >= width) continue; if (px_img < 0 || px_img >= width) continue;
int pixel_idx = py_img * width + px_img; int pixel_idx = py_roi * roi.width + px_roi;
int block_x = px_roi * block_w; int block_x = px_roi * block_w;
int block_y = py_roi * block_h; int block_y = py_roi * block_h;
double [][] pix_data = data[nscene][pixel_idx]; // [nsub][nvel] double [][] pix_data = data[nscene][pixel_idx]; // [nsub][nvel]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment