Commit b4bfe9b1 authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: Add ROI variant of convolve3d(), fix OOM in 5D fine-velocity stage

- CuasRTUtils: add convolve3d(data, Rectangle roi, result_in) that allocates
  only [roi.width*roi.height][nsub][nvel] instead of full [width*height][nsub][nvel],
  skips non-ROI pixels in center/margin loops, uses roi-relative indexing.
  Also fix showConvKernel5d(data, roi, ...) pixel_idx to use ROI-relative
  (py_roi*roi.width+px_roi) instead of full-image indexing.

- CuasDetectRT: add dpixels_5d_roi_pyramid alongside dpixels_5d_pyramid.
  Level-0 and pyramid loop 5D blocks now call convolve3d(window, null) only
  when curt_save_c5full, and convolve3d(window, curt_save_select, null) only
  when curt_save_c5rect. RECT save blocks now read from dpixels_5d_roi_pyramid.
  Eliminates the OutOfMemoryError when curt_save_c5full=false.
Co-authored-by: 's avatarClaude <claude@elphel.com>
parent eb6a764f
......@@ -270,6 +270,7 @@ public class CuasDetectRT {
String [][] ts_pyramid = new String [pyramid_levels][];
double [][][][] dpixels_3d3_pyramid = new double [pyramid_levels][][][];
double [][][][][] dpixels_5d_pyramid = new double [pyramid_levels][][][][];
double [][][][][] dpixels_5d_roi_pyramid = new double [pyramid_levels][][][][];
ts_pyramid[0] = getTimeStamps(1);
dpixels_3d3_pyramid[0] = dpixels_3d3;
dpixels_pyramid[0] = new double[dpixels_log.length-1][width*height];
......@@ -290,13 +291,21 @@ public class CuasDetectRT {
// Level-0 5D convolution (fine velocity from coarse-velocity history)
int num_5d_lev0 = Math.max(0, dpixels_3d3.length - num_hist_5d + 1);
dpixels_5d_pyramid[0] = new double[num_5d_lev0][][][];
dpixels_5d_roi_pyramid[0] = new double[num_5d_lev0][][][];
String[] ts_5d_lev0 = new String[num_5d_lev0];
System.out.println("detectTargets(): will run convolve3d()");
for (int n5d = 0; n5d < num_5d_lev0; n5d++) {
System.out.print ("n5d="+n5d+" ");
double[][][] window = new double[num_hist_5d][][];
for (int h = 0; h < num_hist_5d; h++) {
window[h] = dpixels_3d3[n5d + num_hist_5d - 1 - h];
}
if (curt_save_c5full) {
dpixels_5d_pyramid[0][n5d] = cuasRTUtils.convolve3d(window, null);
}
if (curt_save_c5rect && (curt_save_select != null)) {
dpixels_5d_roi_pyramid[0][n5d] = cuasRTUtils.convolve3d(window, curt_save_select, null);
}
ts_5d_lev0[n5d] = ts_pyramid[0][n5d + num_hist_5d - 1];
}
if (save_5d_pixels && num_5d_lev0 > 0) {
......@@ -308,7 +317,7 @@ public class CuasDetectRT {
}
if (curt_save_c5rect && num_5d_lev0 > 0) {
ImagePlus imp_5d_rect = cuasRTUtils.showConvKernel5d(
dpixels_5d_pyramid[0],
dpixels_5d_roi_pyramid[0],
curt_save_select,
ts_5d_lev0,
title_conv5d+"-RECT");
......@@ -366,13 +375,19 @@ public class CuasDetectRT {
// 5D convolution for pyramid level nlev+1
int num_5d_lev = Math.max(0, dpixels_3d3_pyramid[nlev+1].length - num_hist_5d + 1);
dpixels_5d_pyramid[nlev+1] = new double[num_5d_lev][][][];
dpixels_5d_roi_pyramid[nlev+1] = new double[num_5d_lev][][][];
String[] ts_5d_lev = new String[num_5d_lev];
for (int n5d = 0; n5d < num_5d_lev; n5d++) {
double[][][] window = new double[num_hist_5d][][];
for (int h = 0; h < num_hist_5d; h++) {
window[h] = dpixels_3d3_pyramid[nlev+1][n5d + num_hist_5d - 1 - h];
}
if (curt_save_c5full) {
dpixels_5d_pyramid[nlev+1][n5d] = cuasRTUtils.convolve3d(window, null);
}
if (curt_save_c5rect && (curt_save_select != null)) {
dpixels_5d_roi_pyramid[nlev+1][n5d] = cuasRTUtils.convolve3d(window, curt_save_select, null);
}
ts_5d_lev[n5d] = ts_pyramid[nlev+1][n5d + num_hist_5d - 1];
}
if (save_5d_pixels && num_5d_lev > 0) {
......@@ -384,7 +399,7 @@ public class CuasDetectRT {
}
if (curt_save_c5rect && num_5d_lev > 0) {
ImagePlus imp_5d_rect = cuasRTUtils.showConvKernel5d(
dpixels_5d_pyramid[nlev+1],
dpixels_5d_roi_pyramid[nlev+1],
curt_save_select,
ts_5d_lev,
title_conv5d+"-LEV"+(nlev+1)+"-RECT");
......
......@@ -200,7 +200,9 @@ public class CuasRTUtils {
final AtomicInteger ai = new AtomicInteger(0);
final int [] coarse_vel_indx = consolidationKernel.getCoarseVIdx();
final int decimate_size = pixel_decimate*pixel_decimate;
System.out.print("convolve3d() nhist=");
for (int nhist = 0; nhist < nlayers; nhist++) {
System.out.print(nhist+" ");
final int fhist = nhist;
final int spat_rad = consolidationKernel.getSpatialRadius(); // may be later dependent on nhist for efficiency
final double [][][][] kernel_hist = kernel[fhist];
......@@ -288,10 +290,120 @@ public class CuasRTUtils {
};
}
ImageDtt.startAndJoin(threads);
}
System.out.println();
return result;
}
/**
* ROI variant: compute fine-velocity convolution for a pixel rectangle only.
* Result is indexed ROI-relatively: result[roi_pix][sub][vel] where
* roi_pix = (target_y - roi.y) * roi.width + (target_x - roi.x).
* Pixels outside roi are skipped; full-image source data is still used for
* the spatial neighbourhood reads.
*/
public double [][][] convolve3d(
final double [][][] data,
final Rectangle roi,
double [][][] result_in) {
final int vel_size = 2*velocity_radius + 1;
final int nlayers = data.length;
final int roi_npix = roi.width * roi.height;
final double [][][] result = (result_in != null) ? result_in :
new double [roi_npix][pixel_decimate*pixel_decimate][vel_size * vel_size];
final double [][][][][] kernel = consolidationKernel.getKernel();
final Thread[] threads = ImageDtt.newThreadArray();
final AtomicInteger ai = new AtomicInteger(0);
final int [] coarse_vel_indx = consolidationKernel.getCoarseVIdx();
final int decimate_size = pixel_decimate*pixel_decimate;
final int roi_x0 = roi.x, roi_y0 = roi.y, roi_x1 = roi.x + roi.width, roi_y1 = roi.y + roi.height;
for (int nhist = 0; nhist < nlayers; nhist++) {
final int fhist = nhist;
final int spat_rad = consolidationKernel.getSpatialRadius();
final double [][][][] kernel_hist = kernel[fhist];
final double [][] data_hist = data[fhist];
ai.set(0);
// center pixels — no source boundary checks needed
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int nPix = ai.getAndIncrement(); nPix < indx_center_5d.length; nPix = ai.getAndIncrement()) {
int ipix_dst = indx_center_5d[nPix];
int target_x = ipix_dst % width;
int target_y = ipix_dst / width;
if (target_x < roi_x0 || target_x >= roi_x1 ||
target_y < roi_y0 || target_y >= roi_y1) continue;
int roi_pix = (target_y - roi_y0) * roi.width + (target_x - roi_x0);
for (int sub_idx = 0; sub_idx < decimate_size; sub_idx++) {
for (int v_out_idx = 0; v_out_idx < kernel_hist.length; v_out_idx++) {
int src_coarse_v = coarse_vel_indx[v_out_idx];
if (src_coarse_v >= 0) {
double accumulated_flux = 0.0;
for (int dy = -spat_rad; dy <= spat_rad; dy++) {
int src_y = target_y + dy;
for (int dx = -spat_rad; dx <= spat_rad; dx++) {
int src_x = target_x + dx;
int src_pixel_idx = src_y * width + src_x;
double historical_value = data_hist[src_pixel_idx][src_coarse_v];
int k_dy = dy + spat_rad;
int k_dx = dx + spat_rad;
double weight = kernel_hist[v_out_idx][sub_idx][k_dy][k_dx];
accumulated_flux += historical_value * weight;
}
}
result[roi_pix][sub_idx][v_out_idx] += accumulated_flux;
}
}
}
}
}
};
}
ImageDtt.startAndJoin(threads);
ai.set(0);
// margin pixels — source boundary checks required
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
public void run() {
for (int nPix = ai.getAndIncrement(); nPix < indx_margins_5d.length; nPix = ai.getAndIncrement()) {
int ipix_dst = indx_margins_5d[nPix];
int target_x = ipix_dst % width;
int target_y = ipix_dst / width;
if (target_x < roi_x0 || target_x >= roi_x1 ||
target_y < roi_y0 || target_y >= roi_y1) continue;
int roi_pix = (target_y - roi_y0) * roi.width + (target_x - roi_x0);
for (int sub_idx = 0; sub_idx < decimate_size; sub_idx++) {
for (int v_out_idx = 0; v_out_idx < kernel_hist.length; v_out_idx++) {
int src_coarse_v = coarse_vel_indx[v_out_idx];
if (src_coarse_v >= 0) {
double accumulated_flux = 0.0;
for (int dy = -spat_rad; dy <= spat_rad; dy++) {
int src_y = target_y + dy;
for (int dx = -spat_rad; dx <= spat_rad; dx++) {
int src_x = target_x + dx;
if (src_x >= 0 && src_x < width && src_y >= 0 && src_y < height) {
int src_pixel_idx = src_y * width + src_x;
double historical_value = data_hist[src_pixel_idx][src_coarse_v];
int k_dy = dy + spat_rad;
int k_dx = dx + spat_rad;
double weight = kernel_hist[v_out_idx][sub_idx][k_dy][k_dx];
accumulated_flux += historical_value * weight;
}
}
}
result[roi_pix][sub_idx][v_out_idx] += accumulated_flux;
}
}
}
}
}
};
}
ImageDtt.startAndJoin(threads);
}
return result;
}
public double[][] convolve3D3LReLU(
final double [] data,
final double [] data_prev,
......@@ -1121,7 +1233,7 @@ public class CuasRTUtils {
for (int px_roi = 0; px_roi < roi.width; px_roi++) {
int px_img = roi.x + px_roi;
if (px_img < 0 || px_img >= width) continue;
int pixel_idx = py_img * width + px_img;
int pixel_idx = py_roi * roi.width + px_roi;
int block_x = px_roi * block_w;
int block_y = py_roi * block_h;
double [][] pix_data = data[nscene][pixel_idx]; // [nsub][nvel]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment