...
 
Commits (2)
......@@ -32,7 +32,7 @@ public class CLTParameters {
public int ishift_y = 0; // debug feature - shift source image by this pixels down
private double fat_zero = 0.05; // modify phase correlation to prevent division by very small numbers
private double fat_zero_mono = 0.1; // modify phase correlation to prevent division by very small numbers
private double fat_zero_mono = 0.03; // modify phase correlation to prevent division by very small numbers
private double corr_sigma = 0.8; // LPF correlation sigma
private double corr_sigma_mono = 0.1; // LPF correlation sigma for monochrome images
private double scale_strength_main = 1.0; // leave as is
......@@ -766,9 +766,10 @@ public class CLTParameters {
public boolean taEnMismatch = false; // Enable cost of a measurement layer not having same layer in the same location or near
// gpu processing parameters
public double gpu_corr_scale = 0.75; // reduce GPU-generated correlation values
public int gpu_corr_rad = 7; // size of the correlation to save - initially only 15x15
public double gpu_weight_r = 0.25;
public double gpu_weight_b = 0.25; // weight g = 1.0 - gpu_weight_r - gpu_weight_b
public double gpu_weight_r = 0.5; // 25;
public double gpu_weight_b = 0.2; // 0.25; // weight g = 1.0 - gpu_weight_r - gpu_weight_b
public double gpu_sigma_r = 0.9; // 1.1;
public double gpu_sigma_b = 0.9; // 1.1;
public double gpu_sigma_g = 0.6; // 0.7;
......@@ -1567,6 +1568,7 @@ public class CLTParameters {
properties.setProperty(prefix+"taEnMismatch", this.taEnMismatch +"");
properties.setProperty(prefix+"gpu_corr_scale", this.gpu_corr_scale +"");
properties.setProperty(prefix+"gpu_corr_rad", this.gpu_corr_rad +"");
properties.setProperty(prefix+"gpu_weight_r", this.gpu_weight_r +"");
properties.setProperty(prefix+"gpu_weight_b", this.gpu_weight_b +"");
......@@ -2352,6 +2354,7 @@ public class CLTParameters {
if (properties.getProperty(prefix+"taEnFlaps")!=null) this.taEnFlaps=Boolean.parseBoolean(properties.getProperty(prefix+"taEnFlaps"));
if (properties.getProperty(prefix+"taEnMismatch")!=null) this.taEnMismatch=Boolean.parseBoolean(properties.getProperty(prefix+"taEnMismatch"));
if (properties.getProperty(prefix+"gpu_corr_scale")!=null) this.gpu_corr_scale=Double.parseDouble(properties.getProperty(prefix+"gpu_corr_scale"));
if (properties.getProperty(prefix+"gpu_corr_rad")!=null) this.gpu_corr_rad=Integer.parseInt(properties.getProperty(prefix+"gpu_corr_rad"));
if (properties.getProperty(prefix+"gpu_weight_r")!=null) this.gpu_weight_r=Double.parseDouble(properties.getProperty(prefix+"gpu_weight_r"));
if (properties.getProperty(prefix+"gpu_weight_b")!=null) this.gpu_weight_b=Double.parseDouble(properties.getProperty(prefix+"gpu_weight_b"));
......@@ -3279,6 +3282,8 @@ public class CLTParameters {
gd.addTab ("GPU", "Parameters for GPU development");
gd.addMessage ("--- GPU processing parameters ---");
gd.addNumericField("GPU 2D correlation scale", this.gpu_corr_scale, 4, 6,"",
"Reduce GPU-generated correlation values to approximately match CPU-generated ones");
gd.addNumericField("Correlation radius", this.gpu_corr_rad, 0, 6,"pix",
"Size of the 2D correlation - maximal radius = 7 corresponds to full 15x15 pixel tile");
gd.addNumericField("Correlation weight R", this.gpu_weight_r, 4, 6,"",
......@@ -4057,6 +4062,7 @@ public class CLTParameters {
this.taEnFlaps= gd.getNextBoolean();
this.taEnMismatch= gd.getNextBoolean();
this.gpu_corr_scale = gd.getNextNumber();
this.gpu_corr_rad = (int) gd.getNextNumber();
this.gpu_weight_r = gd.getNextNumber();
this.gpu_weight_b = gd.getNextNumber();
......
......@@ -63,7 +63,10 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.Random;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import com.elphel.imagej.tileprocessor.DttRad2;
......@@ -739,6 +742,13 @@ public class GPUTileProcessor {
Sizeof.FLOAT); // int ElementSizeBytes)
texture_stride_rgba = (int)(device_stride[0] / Sizeof.FLOAT);
}
public int getTilesX() {
return img_width / DTT_SIZE;
}
public int getTilesY() {
return img_height / DTT_SIZE;
}
public void resetGeometryCorrection() {
geometry_correction_set = false;
......@@ -760,7 +770,6 @@ public class GPUTileProcessor {
}
public void setGeometryCorrectionVector() { // will reset geometry_correction_vector_set when running GPU kernel
// if (geometry_correction_vector_set) return;
setExtrinsicsVector(
quadCLT.getGeometryCorrection().getCorrVector());
}
......@@ -794,7 +803,7 @@ public class GPUTileProcessor {
/**
* Copy array of CPU-prepared tasks to the GPU memory
* @param tile_tasks array of TpTask prepared by the CPU (before geometry correction is appllied)
* @param tile_tasks array of TpTask prepared by the CPU (before geometry correction is applied)
* @param use_aux Use second (aux) camera
*/
public void setTasks(TpTask [] tile_tasks, boolean use_aux) // while is it in class member? - just to be able to free
......@@ -1211,6 +1220,63 @@ public class GPUTileProcessor {
return tp_tasks;
}
public GPUTileProcessor.TpTask[] setTpTask(
// final GPUTileProcessor.GpuQuad gpuQuad,
final double [][] disparity_array, // [tilesY][tilesX] - individual per-tile expected disparity
final double disparity_corr,
final boolean [] need_corrs, // should be initialized to boolean[1] or null
final int [][] tile_op, // [tilesY][tilesX] - what to do - 0 - nothing for this tile
final int corr_mask, // <0 - use corr mask from the tile tile_op, >=0 - overwrite all with non-zero corr_mask_tp
final int threadsMax) // maximal number of threads to launch
{
final int tilesX = getTilesX();
final int tilesY = getTilesY();
final AtomicInteger ai = new AtomicInteger(0);
final AtomicBoolean acorrs = new AtomicBoolean(false);
final List<GPUTileProcessor.TpTask> task_list = new CopyOnWriteArrayList<GPUTileProcessor.TpTask>();
final Thread[] threads = ImageDtt.newThreadArray(threadsMax);
for (int ithread = 0; ithread < threads.length; ithread++) {
threads[ithread] = new Thread() {
@Override
public void run() {
for (int nTile = ai.getAndIncrement(); nTile < tilesX * tilesY; nTile = ai.getAndIncrement()) {
int tileY = nTile /tilesX;
int tileX = nTile % tilesX;
// tIndex = tileY * tilesX + tileX;
if (tile_op[tileY][tileX] == 0) continue; // nothing to do for this tile
// which images to use
int img_mask = ImageDtt.getImgMask(tile_op[tileY][tileX]);
// which pairs to combine in the combo: 1 - top, 2 bottom, 4 - left, 8 - right
int corr_mask_tp = ImageDtt.getPairMask(tile_op[tileY][tileX]); // limited to 4 bits only!
if (corr_mask_tp != 0) {
if (corr_mask >=0) {
corr_mask_tp = corr_mask;
}
if (corr_mask_tp != 0) {
acorrs.set(true);
}
}
task_list.add(new GPUTileProcessor.TpTask(
tileX,
tileY,
(float) (disparity_array[tileY][tileX] + disparity_corr),
((img_mask & 0x0f) << 0) |
((corr_mask_tp & 0x3f) << 4)
)); // task == 1 for now
// mask out pairs that use missing channels
}
}
};
}
ImageDtt.startAndJoin(threads);
if (need_corrs != null) {
need_corrs[0] = acorrs.get();
}
return task_list.toArray(new GPUTileProcessor.TpTask[task_list.size()]);
}
/**
* Prepare contents pointers for calculation of the correlation pairs
......@@ -1653,6 +1719,21 @@ public class GPUTileProcessor {
cuCtxSynchronize();
}
public int [] getCorrIndices() {
float [] fnum_corrs = new float[1];
cuMemcpyDtoH(Pointer.to(fnum_corrs), gpu_num_corr_tiles, 1 * Sizeof.FLOAT);
int num_corrs = Float.floatToIntBits(fnum_corrs[0]);
float [] fcorr_indices = new float [num_corrs];
cuMemcpyDtoH(Pointer.to(fcorr_indices), gpu_corr_indices, num_corrs * Sizeof.FLOAT);
int [] corr_indices = new int [num_corrs];
for (int i = 0; i < num_corrs; i++) {
corr_indices[i] = Float.floatToIntBits(fcorr_indices[i]);
}
num_corr_tiles = num_corrs;
return corr_indices;
}
public float [][] getCorr2D(int corr_rad){
int corr_size = (2 * corr_rad + 1) * (2 * corr_rad + 1);
float [] cpu_corrs = new float [ num_corr_tiles * corr_size];
......@@ -1677,20 +1758,6 @@ public class GPUTileProcessor {
return corrs;
}
public int [] getCorrIndices() {
float [] fnum_corrs = new float[1];
cuMemcpyDtoH(Pointer.to(fnum_corrs), gpu_num_corr_tiles, 1 * Sizeof.FLOAT);
int num_corrs = Float.floatToIntBits(fnum_corrs[0]);
float [] fcorr_indices = new float [num_corrs];
cuMemcpyDtoH(Pointer.to(fcorr_indices), gpu_corr_indices, num_corrs * Sizeof.FLOAT);
int [] corr_indices = new int [num_corrs];
for (int i = 0; i < num_corrs; i++) {
corr_indices[i] = Float.floatToIntBits(fcorr_indices[i]);
}
num_corr_tiles = num_corrs;
return corr_indices;
}
//
/**
......@@ -2011,6 +2078,16 @@ public class GPUTileProcessor {
if (debug) System.out.println("constantMemorySize: " + constantMemorySize);
cuMemcpyHtoD(constantMemoryPointer, Pointer.to(lpf_flat), constantMemorySize);
if (debug) System.out.println();
/*
if (debug) {
for (int i = 0; i < lpf_flat.length; i++) {
System.out.print(String.format("%8.5f", lpf_flat[i]));
if (((i+1) % 16) == 0) {
System.out.println();
}
}
}
*/
}
public void setLpfCorr(
......@@ -2027,6 +2104,16 @@ public class GPUTileProcessor {
if (debug) System.out.println("constantMemorySize: " + constantMemorySize);
cuMemcpyHtoD(constantMemoryPointer, Pointer.to(lpf_flat), constantMemorySize);
if (debug) System.out.println();
/*
if (debug) {
for (int i = 0; i < lpf_flat.length; i++) {
System.out.print(String.format("%8.5f", lpf_flat[i]));
if (((i+1) % 16) == 0) {
System.out.println();
}
}
}
*/
}
public float [] floatSetCltLpfFd(
......
......@@ -60,6 +60,9 @@ public class CLTPass3d{
public boolean [] border_tiles = null; // these are border tiles, zero out alpha
public boolean [] selected = null; // which tiles are selected for this layer
public double [][][][] texture_tiles;
public float [][] texture_img = null; // [3][] (RGB) or [4][] RGBA
public Rectangle texture_woi = null; // null or generated texture location/size
public double [][] max_tried_disparity = null; //[ty][tx] used for combined passes, shows maximal disparity for this tile, regardless of results
public boolean is_combo = false;
public boolean is_measured = false;
......@@ -96,13 +99,27 @@ public class CLTPass3d{
{
return texture_tiles;
}
public float [][] getTextureImages()
{
return texture_img;
}
public Rectangle getTextureWoi()
{
return texture_woi;
}
public double [][] getMaxTriedDisparity()
{
return max_tried_disparity;
}
public double [][] getTileRBGA(
int num_layers)
int num_layers) // 4 or 12
{
if (texture_img != null) {
System.out.println("FIXME: implement replacement for the GPU-generated textures (using macro mode?)");
}
if (texture_tiles == null) return null;
int tilesY = texture_tiles.length;
int tilesX = 0;
......@@ -163,6 +180,22 @@ public class CLTPass3d{
int tilesY = tileProcessor.getTilesY();
selected = new boolean[tilesY*tilesX];
int minX = tilesX, minY = tilesY, maxX = -1, maxY = -1;
if (texture_img != null) { // using GPU output
//tileProcessor.getTileSize()
if (texture_woi != null) {
int tile_size = tileProcessor.getTileSize();
texture_bounds = new Rectangle(
texture_woi.x/tile_size, texture_woi.y/tile_size, texture_woi.width/tile_size, texture_woi.height/tile_size);
// setting full rectangle as selected, not just textures? Use some other method?
for (int ty = texture_bounds.y; ty < (texture_bounds.y + texture_bounds.height); ty++) {
for (int tx = texture_bounds.x; tx < (texture_bounds.x + texture_bounds.width); tx++) {
selected[ty*tilesX+tx] = true;
}
}
return;
}
}
if (texture_tiles != null) {
for (int ty = 0; ty < tilesY; ty++) for (int tx = 0; tx < tilesX; tx++){
if (texture_tiles[ty][tx] != null) {
selected[ty * tilesX + tx] = true;
......@@ -174,6 +207,7 @@ public class CLTPass3d{
selected[ty * tilesX + tx] = false; // may be omitted
}
}
}
if (maxX < 0) {
texture_bounds = null;
} else {
......
This diff is collapsed.
......@@ -51,7 +51,6 @@ import com.elphel.imagej.common.DoubleGaussianBlur;
import com.elphel.imagej.common.ShowDoubleFloatArrays;
import com.elphel.imagej.correction.CorrectionColorProc;
import com.elphel.imagej.correction.EyesisCorrections;
import com.elphel.imagej.gpu.GPUTileProcessor;
import com.elphel.imagej.jp4.JP46_Reader_camera;
import com.elphel.imagej.tileprocessor.GeometryCorrection.CorrVector;
import com.elphel.imagej.x3d.export.WavefrontExport;
......@@ -7145,7 +7144,8 @@ public class QuadCLTCPU {
debugLevel);
tp.clt_3d_passes.add(bgnd_data);
// if (show_init_refine)
if ((debugLevel > -2) && clt_parameters.show_first_bg) {
// if ((debugLevel > -2) && clt_parameters.show_first_bg) {
if ((debugLevel > -3) && clt_parameters.show_first_bg) {
tp.showScan(
tp.clt_3d_passes.get(0), // CLTPass3d scan,
"bgnd_data-"+tp.clt_3d_passes.size());
......@@ -9412,14 +9412,14 @@ public class QuadCLTCPU {
)
{
final boolean new_mode = false;
boolean dbg_gpu_transition = true;
final int tilesX = tp.getTilesX();
final int tilesY = tp.getTilesY();
ShowDoubleFloatArrays sdfa_instance = null;
if (clt_parameters.debug_filters && (debugLevel > -1))
if ((clt_parameters.debug_filters && (debugLevel > -1)) || dbg_gpu_transition)
// if ((debugLevel > -1))
sdfa_instance = new ShowDoubleFloatArrays(); // just for debugging?
......@@ -9514,7 +9514,7 @@ public class QuadCLTCPU {
for (int tileY = 0; tileY < tilesY; tileY++){
for (int tileX = 0; tileX < tilesX; tileX++){
texture_tiles_bgnd[tileY][tileX]= null;
if ((texture_tiles[tileY][tileX] != null) &&
if ((texture_tiles[tileY][tileX] != null) && // null pointer
bgnd_tiles[tileY * tilesX + tileX]) {
if (bgnd_tiles_grown2[tileY * tilesX + tileX]) {
texture_tiles_bgnd[tileY][tileX]= texture_tiles[tileY][tileX];
......@@ -9628,6 +9628,7 @@ public class QuadCLTCPU {
CLTPass3d scan = tp.clt_3d_passes.get(scanIndex);
boolean [] borderTiles = scan.border_tiles;
double [][][][] texture_tiles = scan.texture_tiles;
// only place that uses updateSelection()
scan.updateSelection(); // update .selected field (all selected, including border) and Rectangle bounds
if (scan.getTextureBounds() == null) { // not used in lwir
System.out.println("getPassImage(): Empty image!");
......@@ -9858,6 +9859,17 @@ public class QuadCLTCPU {
}
final double disparity_corr = (z_correction == 0) ? 0.0 : geometryCorrection.getDisparityFromZ(1.0/z_correction);
double [][][][][] clt_corr_partial = null;
if (clt_parameters.img_dtt.gpu_mode_debug) {
clt_corr_combo = null;
clt_corr_partial = new double [tilesY][tilesX][][][];
for (int i = 0; i < tilesY; i++){
for (int j = 0; j < tilesX; j++){
clt_corr_partial[i][j] = null;
}
}
}
image_dtt.clt_aberrations_quad_corr(
clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
1, // final int macro_scale, // to correlate tile data instead of the pixel data: 1 - pixels, 8 - tiles
......@@ -9867,7 +9879,7 @@ public class QuadCLTCPU {
saturation_imp, // boolean [][] saturation_imp, // (near) saturated pixels or null
// correlation results - final and partial
clt_corr_combo, // [tp.tilesY][tp.tilesX][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
null, // clt_corr_partial, // [tp.tilesY][tp.tilesX][quad]color][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
clt_corr_partial, // null, // clt_corr_partial, // [tp.tilesY][tp.tilesX][quad]color][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
null, // [tp.tilesY][tp.tilesX][pair]{dx,dy,weight}[(2*transform_size-1)*(2*transform_size-1)] // transpose unapplied. null - do not calculate
// Use it with disparity_maps[scan_step]? clt_mismatch, // [tp.tilesY][tp.tilesX][pair]{dx,dy,weight}[(2*transform_size-1)*(2*transform_size-1)] // transpose unapplied. null - do not calculate
disparity_map, // [12][tp.tilesY * tp.tilesX]
......@@ -9897,7 +9909,7 @@ public class QuadCLTCPU {
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
/// image_dtt.transform_size,
/// image_dtt.transform_size,
clt_parameters.clt_window,
shiftXY, //
disparity_corr, // final double disparity_corr, // disparity at infinity
......@@ -9920,6 +9932,32 @@ public class QuadCLTCPU {
scan_rslt.is_measured = true;
scan_rslt.is_combo = false;
scan_rslt.resetProcessed();
if (clt_corr_partial!=null){ // only to debug matching gpu/cpu
if (debugLevel > -3){ // -1
String [] allColorNames = {"red","blue","green","combo"};
String [] titles = new String[clt_corr_partial.length];
for (int i = 0; i < titles.length; i++){
titles[i]=allColorNames[i % allColorNames.length]+"_"+(i / allColorNames.length);
}
double [][] corr_rslt_partial = image_dtt.corr_partial_dbg(
clt_corr_partial,
2*image_dtt.transform_size - 1, //final int corr_size,
4, // final int pairs,
4, // final int colors,
clt_parameters.corr_border_contrast,
threadsMax,
debugLevel);
// titles.length = 15, corr_rslt_partial.length=16!
System.out.println("corr_rslt_partial.length = "+corr_rslt_partial.length+", titles.length = "+titles.length);
(new ShowDoubleFloatArrays()).showArrays( // out of boundary 15
corr_rslt_partial,
tilesX*(2*image_dtt.transform_size),
tilesY*(2*image_dtt.transform_size),
true,
image_name+sAux()+"-PART_CORR-CPU-D"+clt_parameters.disparity);
}
}
return scan_rslt;
}
......