Commit 84eeaf35 authored by Andrey Filippov's avatar Andrey Filippov

Added GPU 2D phase correlation, related and debug functionality

parent dc4e1f60
......@@ -30,6 +30,7 @@ public class CLTParameters {
public int dbg_mode = 0; // 0 - normal, +1 - no DCT/IDCT
public int ishift_x = 0; // debug feature - shift source image by this pixels left
public int ishift_y = 0; // debug feature - shift source image by this pixels down
private double fat_zero = 0.05; // modify phase correlation to prevent division by very small numbers
private double fat_zero_mono = 0.1; // modify phase correlation to prevent division by very small numbers
private double corr_sigma = 0.8; // LPF correlation sigma
......@@ -763,7 +764,18 @@ public class CLTParameters {
public boolean taEnFlaps = true; // Enable cost of using supertile "flaps" (not in the center 8x8 tiles area)
public boolean taEnMismatch = false; // Enable cost of a measurement layer not having same layer in the same location or near
// gpu processing parameters
public int gpu_corr_rad = 7; // size of the correlation to save - initially only 15x15
public double gpu_weight_r = 0.25;
public double gpu_weight_b = 0.25; // weight g = 1.0 - gpu_weight_r - gpu_weight_b
public double gpu_sigma_r = 1.1;
public double gpu_sigma_b = 1.1;
public double gpu_sigma_g = 0.7;
public double gpu_sigma_m = 0.7;
public double gpu_sigma_corr = 0.9;
public double gpu_sigma_corr_m = 0.15;
public double gpu_fatz = 30.0;
public double gpu_fatz_m = 30.0;
public boolean replaceWeakOutliers = true; // false;
......@@ -817,6 +829,15 @@ public class CLTParameters {
return monochrome ? fat_zero_mono : fat_zero;
}
public double getGpuFatZero(boolean monochrome) {
return monochrome ? gpu_fatz_m : gpu_fatz;
}
public double getGpuCorrSigma(boolean monochrome) {
return monochrome ? gpu_sigma_corr_m : gpu_sigma_corr;
}
public double getScaleStrength(boolean aux) {
return aux ? scale_strength_aux : scale_strength_main;
}
......@@ -1512,6 +1533,18 @@ public class CLTParameters {
properties.setProperty(prefix+"taEnMismatch", this.taEnMismatch +"");
properties.setProperty(prefix+"gpu_corr_rad", this.gpu_corr_rad +"");
properties.setProperty(prefix+"gpu_weight_r", this.gpu_weight_r +"");
properties.setProperty(prefix+"gpu_weight_b", this.gpu_weight_b +"");
properties.setProperty(prefix+"gpu_sigma_r", this.gpu_sigma_r +"");
properties.setProperty(prefix+"gpu_sigma_b", this.gpu_sigma_b +"");
properties.setProperty(prefix+"gpu_sigma_g", this.gpu_sigma_g +"");
properties.setProperty(prefix+"gpu_sigma_m", this.gpu_sigma_m +"");
properties.setProperty(prefix+"gpu_sigma_corr", this.gpu_sigma_corr +"");
properties.setProperty(prefix+"gpu_sigma_corr_m", this.gpu_sigma_corr_m +"");
properties.setProperty(prefix+"gpu_fatz", this.gpu_fatz +"");
properties.setProperty(prefix+"gpu_fatz_m", this.gpu_fatz_m +"");
properties.setProperty(prefix+"debug_initial_discriminate", this.debug_initial_discriminate+"");
properties.setProperty(prefix+"dbg_migrate", this.dbg_migrate+"");
......@@ -2265,6 +2298,17 @@ public class CLTParameters {
if (properties.getProperty(prefix+"taEnFlaps")!=null) this.taEnFlaps=Boolean.parseBoolean(properties.getProperty(prefix+"taEnFlaps"));
if (properties.getProperty(prefix+"taEnMismatch")!=null) this.taEnMismatch=Boolean.parseBoolean(properties.getProperty(prefix+"taEnMismatch"));
if (properties.getProperty(prefix+"gpu_corr_rad")!=null) this.gpu_corr_rad=Integer.parseInt(properties.getProperty(prefix+"gpu_corr_rad"));
if (properties.getProperty(prefix+"gpu_weight_r")!=null) this.gpu_weight_r=Double.parseDouble(properties.getProperty(prefix+"gpu_weight_r"));
if (properties.getProperty(prefix+"gpu_weight_b")!=null) this.gpu_weight_b=Double.parseDouble(properties.getProperty(prefix+"gpu_weight_b"));
if (properties.getProperty(prefix+"gpu_sigma_r")!=null) this.gpu_sigma_r=Double.parseDouble(properties.getProperty(prefix+"gpu_sigma_r"));
if (properties.getProperty(prefix+"gpu_sigma_b")!=null) this.gpu_sigma_b=Double.parseDouble(properties.getProperty(prefix+"gpu_sigma_b"));
if (properties.getProperty(prefix+"gpu_sigma_g")!=null) this.gpu_sigma_g=Double.parseDouble(properties.getProperty(prefix+"gpu_sigma_g"));
if (properties.getProperty(prefix+"gpu_sigma_m")!=null) this.gpu_sigma_m=Double.parseDouble(properties.getProperty(prefix+"gpu_sigma_m"));
if (properties.getProperty(prefix+"gpu_sigma_corr")!=null) this.gpu_sigma_corr=Double.parseDouble(properties.getProperty(prefix+"gpu_sigma_corr"));
if (properties.getProperty(prefix+"gpu_sigma_corr_m")!=null) this.gpu_sigma_corr_m=Double.parseDouble(properties.getProperty(prefix+"gpu_sigma_corr_m"));
if (properties.getProperty(prefix+"gpu_fatz")!=null) this.gpu_fatz=Double.parseDouble(properties.getProperty(prefix+"gpu_fatz"));
if (properties.getProperty(prefix+"gpu_fatz_m")!=null) this.gpu_fatz_m=Double.parseDouble(properties.getProperty(prefix+"gpu_fatz_m"));
if (properties.getProperty(prefix+"debug_initial_discriminate")!=null) this.debug_initial_discriminate=Boolean.parseBoolean(properties.getProperty(prefix+"debug_initial_discriminate"));
if (properties.getProperty(prefix+"dbg_migrate")!=null) this.dbg_migrate=Boolean.parseBoolean(properties.getProperty(prefix+"dbg_migrate"));
......@@ -3159,6 +3203,35 @@ public class CLTParameters {
gd.addCheckbox ("Cost of using supertile \"flaps\" (not in the center 8x8 tiles area)", this.taEnFlaps);
gd.addCheckbox ("Cost of a measurement layer not having same layer in the same location or near", this.taEnMismatch);
gd.addTab ("GPU", "Parameters for GPU development");
gd.addMessage ("--- GPU processing parameters ---");
gd.addNumericField("Correlation radius", this.gpu_corr_rad, 0, 6,"pix",
"Size of the 2D correlation - maximal radius = 7 corresponds to full 15x15 pixel tile");
gd.addNumericField("Correlation weight R", this.gpu_weight_r, 4, 6,"",
"Weight of R for composite 2D correlation (green weight is 1.0 -gpu_weight_r - gpu_weight_b");
gd.addNumericField("Correlation weight B", this.gpu_weight_b, 4, 6,"",
"Weight of R for composite 2D correlation (green weight is 1.0 -gpu_weight_r - gpu_weight_b");
gd.addNumericField("Color LPF sigma R", this.gpu_sigma_r, 4, 6,"pix",
"LPF sigma to process color components during aberration correction");
gd.addNumericField("Color LPF sigma B", this.gpu_sigma_b, 4, 6,"pix",
"LPF sigma to process color components during aberration correction");
gd.addNumericField("Color LPF sigma G", this.gpu_sigma_g, 4, 6,"pix",
"LPF sigma to process color components during aberration correction");
gd.addNumericField("Monochrome LPF sigma", this.gpu_sigma_m, 4, 6,"pix",
"LPF sigma to process monochrome (e.g.LWIR) during aberration correction");
gd.addNumericField("LPF sigma for correlation, color", this.gpu_sigma_corr, 4, 6,"pix",
"LPF sigma to apply to the composite 2D correlation for RGB images");
gd.addNumericField("LPF sigma for correlation, mono", this.gpu_sigma_corr_m, 4, 6,"pix",
"LPF sigma to apply to the composite 2D correlation for monochrome images");
gd.addNumericField("Fat zero (absolute) for phase correlation of color images", this.gpu_fatz, 4, 6,"",
"Add squared fat zero to the sum of squared amplitudes, color images");
gd.addNumericField("Fat zero (absolute) for phase correlation of monochrome images", this.gpu_fatz_m, 4, 6,"",
"Add squared fat zero to the sum of squared amplitudes, monochrome images");
gd.addTab ("LWIR", "parameters for LWIR/EO 8-camera rig");
this.lwir.dialogQuestions(gd);
gd.addTab ("Debug", "Other debug images");
gd.addMessage ("--- Other debug images ---");
// clt_parameters.debug_initial_discriminate, // final boolean debug_initial_discriminate,
......@@ -3190,8 +3263,6 @@ public class CLTParameters {
gd.addMessage ("Unity up vector in camera coordinate system (x - right, y - up, z - to camera): {"+
this.vertical_xyz[0]+","+ this.vertical_xyz[1]+","+ this.vertical_xyz[2]+"}");
gd.addTab ("LWIR", "parameters for LWIR/EO 8-camera rig");
this.lwir.dialogQuestions(gd);
// gd.buildDialog();
gd.showDialog();
......@@ -3886,6 +3957,20 @@ public class CLTParameters {
this.taEnFlaps= gd.getNextBoolean();
this.taEnMismatch= gd.getNextBoolean();
this.gpu_corr_rad = (int) gd.getNextNumber();
this.gpu_weight_r = gd.getNextNumber();
this.gpu_weight_b = gd.getNextNumber();
this.gpu_sigma_r = gd.getNextNumber();
this.gpu_sigma_b = gd.getNextNumber();
this.gpu_sigma_g = gd.getNextNumber();
this.gpu_sigma_m = gd.getNextNumber();
this.gpu_sigma_corr = gd.getNextNumber();
this.gpu_sigma_corr_m = gd.getNextNumber();
this.gpu_fatz = gd.getNextNumber();
this.gpu_fatz_m = gd.getNextNumber();
this.lwir.dialogAnswers(gd);
this.debug_initial_discriminate= gd.getNextBoolean();
this.dbg_migrate= gd.getNextBoolean();
......@@ -3911,8 +3996,6 @@ public class CLTParameters {
this.show_first_clusters= gd.getNextBoolean();
this.show_planes= gd.getNextBoolean();
this.lwir.dialogAnswers(gd);
return true;
}
......
......@@ -231,6 +231,7 @@ public class EyesisCorrectionParameters {
cp.zcorrect= this.zcorrect;
cp.saveSettings= this.saveSettings;
cp.sourceDirectory= this.sourceDirectory;
cp.tile_processor_gpu = this.tile_processor_gpu;
cp.use_set_dirs = this.use_set_dirs;
// cp.sourcePrefix= this.sourcePrefix;
// cp.sourceSuffix= this.sourceSuffix;
......@@ -388,6 +389,8 @@ public class EyesisCorrectionParameters {
properties.setProperty(prefix+"saveSettings",this.saveSettings+"");
properties.setProperty(prefix+"sourceDirectory",this.sourceDirectory);
properties.setProperty(prefix+"tile_processor_gpu",this.tile_processor_gpu);
properties.setProperty(prefix+"use_set_dirs", this.use_set_dirs+"");
properties.setProperty(prefix+"sourcePrefix",this.sourcePrefix);
......@@ -543,6 +546,7 @@ public class EyesisCorrectionParameters {
if (properties.getProperty(prefix+"zcorrect")!=null) this.zcorrect=Boolean.parseBoolean(properties.getProperty(prefix+"zcorrect"));
if (properties.getProperty(prefix+"saveSettings")!=null) this.saveSettings=Boolean.parseBoolean(properties.getProperty(prefix+"saveSettings"));
if (properties.getProperty(prefix+"sourceDirectory")!= null) this.sourceDirectory=properties.getProperty(prefix+"sourceDirectory");
if (properties.getProperty(prefix+"tile_processor_gpu")!= null) this.tile_processor_gpu=properties.getProperty(prefix+"tile_processor_gpu");
if (properties.getProperty(prefix+"firstSubCamera")!= null) this.firstSubCamera=Integer.parseInt(properties.getProperty(prefix+"firstSubCamera"));
if (properties.getProperty(prefix+"firstSubCameraConfig")!= null) this.firstSubCameraConfig=Integer.parseInt(properties.getProperty(prefix+"firstSubCameraConfig"));
if (properties.getProperty(prefix+"numSubCameras")!= null) this.numSubCameras=Integer.parseInt(properties.getProperty(prefix+"numSubCameras"));
......
......@@ -1220,7 +1220,7 @@ public class EyesisDCT {
}
if (this.correctionsParameters.deconvolve) { // process with DCT, otherwise use simple debayer
ImageDtt image_dtt = new ImageDtt(false, 1.0); // Bayer( not monochrome), scale correlation strengths
ImageDtt image_dtt = new ImageDtt(dctParameters.dct_size, false, 1.0); // Bayer( not monochrome), scale correlation strengths
double [][][][] dct_data = image_dtt.mdctStack(
stack,
channel,
......
......@@ -3075,7 +3075,7 @@ private Panel panel1,
}
}
ImageDtt image_dtt = new ImageDtt(false, 1.0); // Bayer( not monochrome), scale correlation strengths
ImageDtt image_dtt = new ImageDtt(DCT_PARAMETERS.dct_size, false, 1.0); // Bayer( not monochrome), scale correlation strengths
double [][][][] dctdc_data = image_dtt.mdctScale(
DBG_IMP.getStack(),
DCT_PARAMETERS.kernel_chn,
......@@ -3173,7 +3173,7 @@ private Panel panel1,
}
}
ImageDtt image_dtt = new ImageDtt(false, 1.0); // Bayer( not monochrome), scale correlation strengths
ImageDtt image_dtt = new ImageDtt(DCT_PARAMETERS.dct_size,false, 1.0); // Bayer( not monochrome), scale correlation strengths
double [][][][] dctdc_data = image_dtt.mdctStack(
DBG_IMP.getStack(),
DCT_PARAMETERS.kernel_chn,
......@@ -5723,6 +5723,7 @@ private Panel panel1,
if (!prepareRigImages()) return false;
String configPath=getSaveCongigPath();
if (configPath.equals("ABORT")) return false;
// if ((CORRECTION_PARAMETERS.tile_processor_gpu != null) &&
if (DEBUG_LEVEL > -2){
System.out.println("++++++++++++++ Calculating combined correlations ++++++++++++++");
......@@ -5740,6 +5741,7 @@ private Panel panel1,
try {
TWO_QUAD_CLT.prepareFilesForGPUDebug(
CORRECTION_PARAMETERS.tile_processor_gpu,// String save_prefix, // absolute path to the cuda project root
QUAD_CLT, // QuadCLT quadCLT_main,
QUAD_CLT_AUX, // QuadCLT quadCLT_aux,
CLT_PARAMETERS, // EyesisCorrectionParameters.DCTParameters dct_parameters,
......@@ -7048,7 +7050,10 @@ private Panel panel1,
}
}
ImageDtt image_dtt = new ImageDtt(false, 1.0); // Bayer( not monochrome), scale correlation strengths
ImageDtt image_dtt = new ImageDtt(
CLT_PARAMETERS.transform_size,
false,
1.0); // Bayer( not monochrome), scale correlation strengths
double [][][][][] clt_data = image_dtt.cltStack(
DBG_IMP.getStack(),
0, // CLT_PARAMETERS.kernel_chn,
......@@ -7082,7 +7087,7 @@ private Panel panel1,
for (int chn = 0; chn < clt_data.length; chn++) {
clt_data[chn] = image_dtt.clt_shiftXY(
clt_data[chn], // final double [][][][] dct_data, // array [tilesY][tilesX][4][dct_size*dct_size]
CLT_PARAMETERS.transform_size, // final int dct_size,
/// CLT_PARAMETERS.transform_size, // final int dct_size,
CLT_PARAMETERS.shift_x, // final double shiftX,
CLT_PARAMETERS.shift_y, // final double shiftY,
(CLT_PARAMETERS.dbg_mode >> 2) & 3, // swap order hor/vert
......@@ -7095,7 +7100,7 @@ private Panel panel1,
for (int chn=0; chn<iclt_data.length;chn++){
iclt_data[chn] = image_dtt.iclt_2d(
clt_data[chn], // scanline representation of dcd data, organized as dct_size x dct_size tiles
CLT_PARAMETERS.transform_size, // final int
/// CLT_PARAMETERS.transform_size, // final int
CLT_PARAMETERS.clt_window, //window_type
CLT_PARAMETERS.iclt_mask, //which of 4 to transform back
CLT_PARAMETERS.dbg_mode, //which of 4 to transform back
......@@ -7178,7 +7183,10 @@ private Panel panel1,
}
String suffix = "-dx_"+(CLT_PARAMETERS.ishift_x+CLT_PARAMETERS.shift_x)+"_dy_"+(CLT_PARAMETERS.ishift_y+CLT_PARAMETERS.shift_y);
ImageDtt image_dtt = new ImageDtt(COLOR_PROC_PARAMETERS.isMonochrome(), CLT_PARAMETERS.getScaleStrength(false)); // Bayer, not monochrome
ImageDtt image_dtt = new ImageDtt(
CLT_PARAMETERS.transform_size,
COLOR_PROC_PARAMETERS.isMonochrome(),
CLT_PARAMETERS.getScaleStrength(false)); // Bayer, not monochrome
String [] titles = {
"redCC", "redSC", "redCS", "redSS",
"blueCC", "blueSC", "blueCS", "blueSS",
......@@ -7235,7 +7243,7 @@ private Panel panel1,
for (int chn = 0; chn < clt_data.length; chn++) {
clt_data2[chn] = image_dtt.clt_shiftXY(
clt_data2[chn], // final double [][][][] dct_data, // array [tilesY][tilesX][4][dct_size*dct_size]
CLT_PARAMETERS.transform_size, // final int dct_size,
/// CLT_PARAMETERS.transform_size, // final int dct_size,
CLT_PARAMETERS.shift_x, // final double shiftX,
CLT_PARAMETERS.shift_y, // final double shiftY,
(CLT_PARAMETERS.dbg_mode >> 2) & 3, // swap order hor/vert
......@@ -7266,7 +7274,7 @@ private Panel panel1,
clt_corr[chn] = image_dtt.clt_correlate(
clt_data[chn], // final double [][][][] data1, // array [tilesY][tilesX][4][dct_size*dct_size]
clt_data2[chn], // final double [][][][] data2, // array [tilesY][tilesX][4][dct_size*dct_size]
CLT_PARAMETERS.transform_size, // final int dct_size,
/// CLT_PARAMETERS.transform_size, // final int dct_size,
CLT_PARAMETERS.getFatZero(image_dtt.isMonochrome()), // final double fat_zero, // add to denominator to modify phase correlation (same units as data1, data2)
CLT_PARAMETERS.tileX, //final int debug_tileX
CLT_PARAMETERS.tileY, //final int debug_tileY
......@@ -7297,7 +7305,7 @@ private Panel panel1,
image_dtt.clt_lpf( // filter in-place
CLT_PARAMETERS.getCorrSigma(image_dtt.isMonochrome()), // final double sigma,
clt_corr[chn], // final double [][][][] clt_data,
CLT_PARAMETERS.transform_size,
/// CLT_PARAMETERS.transform_size,
THREADS_MAX, // maximal number of threads to launch
DEBUG_LEVEL); // globalDebugLevel)
}
......
......@@ -74,12 +74,14 @@ import jcuda.nvrtc.nvrtcProgram;
public class GPUTileProcessor {
static String GPU_KERNEL_FILE = "dtt8x8.cuh";
static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"};
static String GPU_CONVERT_CORRECT_TILES_NAME = "convert_correct_tiles";
static String GPU_IMCLT_RBG_NAME = "imclt_rbg";
static String GPU_CONVERT_CORRECT_TILES_NAME = "convert_correct_tiles"; // name in C code
static String GPU_IMCLT_RBG_NAME = "imclt_rbg"; // name in C code
static String GPU_CORRELATE2D_NAME = "correlate2D"; // name in C code
// pass some defines to gpu source code with #ifdef JCUDA
public static int DTT_SIZE = 8;
static int THREADSX = DTT_SIZE;
public static int NUM_CAMS = 4;
public static int NUM_PAIRS = 6; // top hor, bottom hor, left vert, right vert, main diagonal, other diagonal
static int NUM_COLORS = 3;
public static int IMG_WIDTH = 2592;
public static int IMG_HEIGHT = 1936;
......@@ -88,6 +90,8 @@ public class GPUTileProcessor {
static int KERNELS_LSTEP = 4;
static int THREADS_PER_TILE = 8;
static int TILES_PER_BLOCK = 4; // 8 - slower
static int CORR_THREADS_PER_TILE = 8;
static int CORR_TILES_PER_BLOCK = 4;
static int IMCLT_THREADS_PER_TILE = 16;
static int IMCLT_TILES_PER_BLOCK = 4;
......@@ -95,6 +99,10 @@ public class GPUTileProcessor {
static int CLTEXTRA_SIZE = 8;
static int KERN_TILES = KERNELS_HOR * KERNELS_VERT * NUM_COLORS;
static int KERN_SIZE = KERN_TILES * 4 * 64;
static int CORR_SIZE = (2* DTT_SIZE - 1) * (2* DTT_SIZE - 1); // 15x15
public static int CORR_PAIR_SHIFT = 8;
public static int TASK_CORR_BITS = 4; // start of pair mask
public static int CORR_OUT_RAD = 7; // output radius of the correelations (implement)
int DTTTEST_BLOCK_WIDTH = 32; // may be read from the source code
......@@ -105,6 +113,7 @@ public class GPUTileProcessor {
private CUfunction GPU_CONVERT_CORRECT_TILES_kernel = null;
private CUfunction GPU_IMCLT_RBG_kernel = null;
private CUfunction GPU_CORRELATE2D_kernel = null;
// CPU arrays of pointers to GPU memory
// These arrays may go to method, they are here just to be able to free GPU memory if needed
private CUdeviceptr [] gpu_kernels_h = new CUdeviceptr[NUM_CAMS];
......@@ -119,18 +128,25 @@ public class GPUTileProcessor {
private CUdeviceptr gpu_kernel_offsets = new CUdeviceptr();
private CUdeviceptr gpu_bayer = new CUdeviceptr();
private CUdeviceptr gpu_tasks = new CUdeviceptr();
private CUdeviceptr gpu_tasks = new CUdeviceptr(); // allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.POINTER
private CUdeviceptr gpu_corrs = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER
private CUdeviceptr gpu_clt = new CUdeviceptr();
// private
private CUdeviceptr gpu_corr_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER
// private
CUmodule module; // to access constants memory
// private CUdeviceptr gpu_lpf = new CUdeviceptr();
private int mclt_stride;
private int corr_stride;
private int imclt_stride;
public int num_task_tiles;
public int num_corr_tiles;
public class TpTask {
public int task;
public int task; // [0](+1) - generate 4 images, [4..9]+16..+512 - correlation pairs
public float target_disparity;
public int ty;
......@@ -264,6 +280,7 @@ public class GPUTileProcessor {
"#define DTT_SIZE " + DTT_SIZE+"\n"+
"#define THREADSX " + THREADSX+"\n"+
"#define NUM_CAMS " + NUM_CAMS+"\n"+
"#define NUM_PAIRS " + NUM_PAIRS+"\n"+
"#define NUM_COLORS " + NUM_COLORS+"\n"+
"#define IMG_WIDTH " + IMG_WIDTH+"\n"+
"#define IMG_HEIGHT " + IMG_HEIGHT+"\n"+
......@@ -272,8 +289,13 @@ public class GPUTileProcessor {
"#define KERNELS_LSTEP " + KERNELS_LSTEP+"\n"+
"#define THREADS_PER_TILE " + THREADS_PER_TILE+"\n"+
"#define TILES_PER_BLOCK " + TILES_PER_BLOCK+"\n"+
"#define CORR_THREADS_PER_TILE " + CORR_THREADS_PER_TILE+"\n"+
"#define CORR_TILES_PER_BLOCK " + CORR_TILES_PER_BLOCK+"\n"+
"#define IMCLT_THREADS_PER_TILE " + IMCLT_THREADS_PER_TILE+"\n"+
"#define IMCLT_TILES_PER_BLOCK " + IMCLT_TILES_PER_BLOCK+"\n";
"#define IMCLT_TILES_PER_BLOCK " + IMCLT_TILES_PER_BLOCK+"\n"+
"#define CORR_PAIR_SHIFT " + CORR_PAIR_SHIFT+"\n"+
"#define TASK_CORR_BITS " + TASK_CORR_BITS+"\n"+
"#define CORR_OUT_RAD " + CORR_OUT_RAD+"\n";
for (String src_file:GPU_KERNEL_FILES) {
File file = null;
......@@ -297,17 +319,21 @@ public class GPUTileProcessor {
}
// Create the kernel functions (first - just test)
String [] func_names = {GPU_CONVERT_CORRECT_TILES_NAME, GPU_IMCLT_RBG_NAME};
String [] func_names = {GPU_CONVERT_CORRECT_TILES_NAME, GPU_IMCLT_RBG_NAME, GPU_CORRELATE2D_NAME};
CUfunction[] functions = createFunctions(kernelSource, func_names);
this.GPU_CONVERT_CORRECT_TILES_kernel = functions[0];
this.GPU_IMCLT_RBG_kernel = functions[1];
this.GPU_CORRELATE2D_kernel = functions[2];
System.out.println("GPU kernel functions initialized");
System.out.println("Sizeof.POINTER="+Sizeof.POINTER);
// System.out.println("Sizeof.POINTER="+Sizeof.POINTER);
System.out.println(GPU_CONVERT_CORRECT_TILES_kernel.toString());
System.out.println(GPU_IMCLT_RBG_kernel.toString());
System.out.println(GPU_CORRELATE2D_kernel.toString());
// Init data arrays
// Init data arrays for all kernels
int tilesX = IMG_WIDTH / DTT_SIZE;
int tilesY = IMG_HEIGHT / DTT_SIZE;
long [] device_stride = new long [1];
for (int ncam = 0; ncam < NUM_CAMS; ncam++) {
gpu_kernels_h[ncam] = new CUdeviceptr();
......@@ -315,7 +341,6 @@ public class GPUTileProcessor {
gpu_kernel_offsets_h[ncam] = new CUdeviceptr();
cuMemAlloc(gpu_kernel_offsets_h[ncam],KERN_TILES * CLTEXTRA_SIZE * Sizeof.FLOAT ); // public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
gpu_bayer_h[ncam] = new CUdeviceptr();
long [] device_stride = new long [1];
cuMemAllocPitch (
gpu_bayer_h[ncam], // CUdeviceptr dptr,
device_stride, // long[] pPitch,
......@@ -323,6 +348,7 @@ public class GPUTileProcessor {
IMG_HEIGHT, // long Height,
Sizeof.FLOAT); // int ElementSizeBytes)
mclt_stride = (int)(device_stride[0] / Sizeof.FLOAT);
gpu_corr_images_h[ncam] = new CUdeviceptr();
cuMemAllocPitch (
gpu_corr_images_h[ncam], // CUdeviceptr dptr,
......@@ -365,10 +391,22 @@ public class GPUTileProcessor {
// Set task array
cuMemAlloc(gpu_tasks, tilesX * tilesY * TPTASK_SIZE * Sizeof.POINTER);
// Set corrs array
/// cuMemAlloc(gpu_corrs, tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER);
cuMemAlloc(gpu_corr_indices,tilesX * tilesY * NUM_PAIRS * Sizeof.POINTER);
cuMemAllocPitch (
gpu_corrs, // CUdeviceptr dptr,
device_stride, // long[] pPitch,
CORR_SIZE * Sizeof.FLOAT, // long WidthInBytes,
NUM_PAIRS * tilesX * tilesY, // long Height,
Sizeof.FLOAT); // int ElementSizeBytes)
corr_stride = (int)(device_stride[0] / Sizeof.FLOAT);
}
public void setTasks(TpTask [] tile_tasks, boolean use_aux)
public void setTasks(TpTask [] tile_tasks, boolean use_aux) // while is it in class member? - just to be able to free
{
num_task_tiles = tile_tasks.length;
float [] ftasks = new float [TPTASK_SIZE * num_task_tiles];
......@@ -378,6 +416,17 @@ public class GPUTileProcessor {
cuMemcpyHtoD(gpu_tasks, Pointer.to(ftasks), TPTASK_SIZE * num_task_tiles * Sizeof.FLOAT);
}
public void setCorrIndices(int [] corr_indices)
{
num_corr_tiles = corr_indices.length;
float [] fcorr_indices = new float [corr_indices.length];
for (int i = 0; i < num_corr_tiles; i++) {
fcorr_indices[i] = Float.intBitsToFloat(corr_indices[i]);
}
cuMemcpyHtoD(gpu_corr_indices, Pointer.to(fcorr_indices), num_corr_tiles * Sizeof.FLOAT);
}
public void setConvolutionKernel(
float [] kernel, // [tileY][tileX][color][..]
float [] kernel_offsets,
......@@ -479,6 +528,8 @@ public class GPUTileProcessor {
// need to run setTasks(TpTask [] tile_tasks, boolean use_aux) to format/transfer to GPU memory
public TpTask [] setFullFrameImages(
float target_disparity, // apply same disparity to all tiles
int out_image, // from which tiles to generate image (currently 0/1)
int corr_mask, // which correlation pairs to generate (maybe later - reduce size from 15x15)
boolean use_master,
boolean use_aux,
final GeometryCorrection geometryCorrection_main,
......@@ -489,11 +540,19 @@ public class GPUTileProcessor {
int tilesX = IMG_WIDTH / DTT_SIZE;
int tilesY = IMG_HEIGHT / DTT_SIZE;
float [] target_disparities = new float [tilesX * tilesY];
int [] out_images = new int [tilesX * tilesY];
int [] corr_masks = new int [tilesX * tilesY];
if (target_disparity != 0.0) {
for (int i = 0; i <target_disparities.length; i++ ) target_disparities[i] = target_disparity;
}
for (int i = 0; i <out_images.length; i++ ) {
out_images[i] = out_image; // 0xf; // all 4 images
corr_masks[i] = corr_mask; // 0x3f; // all 6 correlations
}
return setFullFrameImages(
target_disparities, // should be tilesX*tilesY long
out_images, // int [] out_images, // from which tiles to generate image (currently 0/1)
corr_masks, // int [] corr_mask, // which correlation pairs to generate (maybe later - reduce size from 15x15)
use_master,
use_aux,
geometryCorrection_main,
......@@ -505,6 +564,8 @@ public class GPUTileProcessor {
public TpTask [] setFullFrameImages(
float [] target_disparities, // should be tilesX*tilesY long
int [] out_images, // from which tiles to generate image (currently 0/1)
int [] corr_mask, // which correlation pairs to generate (maybe later - reduce size from 15x15)
boolean use_master,
boolean use_aux,
final GeometryCorrection geometryCorrection_main,
......@@ -520,7 +581,13 @@ public class GPUTileProcessor {
int indx = 0;
for (int ty = 0; ty < tilesY; ty++) {
for (int tx = 0; tx < tilesX; tx++) {
tp_tasks[indx] = new TpTask(tx,ty, target_disparities[indx], 1); // task == 1 for now
// tp_tasks[indx] = new TpTask(tx,ty, target_disparities[indx], 1); // task == 1 for now
// Only generate for non-empty tasks, use 1 empty empty as a terminator?
tp_tasks[indx] = new TpTask(tx,ty, target_disparities[indx],
((out_images[indx] & 0x0f) << 0) |
((corr_mask [indx] & 0x3f) << 4)
); // task == 1 for now
indx++;
}
}
......@@ -534,6 +601,94 @@ public class GPUTileProcessor {
return tp_tasks;
}
/**
* Prepare contents pointers for calculation of the correlation pairs
* @param tp_tasks array of tasks that contain masks of the required pairs
* @return each element has (tile_number << 8) | (pair_number & 0xff)
*/
public int [] getCorrTasks(
TpTask [] tp_tasks) {
int tilesX = IMG_WIDTH / DTT_SIZE;
int num_corr = 0;
int task_mask = (1 << NUM_PAIRS) - 1;
for (TpTask tt: tp_tasks) {
int pm = (tt.task >> TASK_CORR_BITS) & task_mask;
if (pm != 0) {
for (int b = 0; b < NUM_PAIRS; b++) if ((pm & (1 << b)) != 0) {
num_corr++; }
}
}
int [] iarr = new int[num_corr];
num_corr = 0;
for (TpTask tt: tp_tasks) {
int pm = (tt.task >> TASK_CORR_BITS) & task_mask;
if (pm != 0) {
int tile = (tt.ty * tilesX +tt.tx);
for (int b = 0; b < NUM_PAIRS; b++) if ((pm & (1 << b)) != 0) {
iarr[num_corr++] = (tile << CORR_PAIR_SHIFT) | b;
}
}
}
return iarr;
}
public static String [] getCorrTitles() {
return new String []{"hor-top","hor-bottom","vert-left","vert-right","diag-main","diag-other"};
}
public static double [][] getCorr2DView(
int tilesX,
int tilesY,
int [] indices,
float [][] corr2d,
int [] wh){ // if is [2] - return width, height
if ((corr2d == null) || (corr2d.length == 0)) {
return new double [NUM_PAIRS][0];
}
int corr_size = (int)(Math.round(Math.sqrt(corr2d[0].length)));// make smaller later?
int width = tilesX * (corr_size + 1) + 1;
int height = tilesY * (corr_size + 1) + 1;
double [][] data = new double [NUM_PAIRS][];
data[0] = new double[height*width];
for (int ty = 0; ty < tilesY; ty++) {
for (int tx = 0; tx < tilesX; tx++) {
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
data[0][(ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1)] = Double.NaN;
}
}
}
}
for (int np = 1; np < NUM_PAIRS; np++) {
data[np] = data[0].clone();
}
for (int n = 0; n < indices.length; n++) {
int nt = indices[n] >> CORR_PAIR_SHIFT;
int np = indices[n] & ((1 << CORR_PAIR_SHIFT) - 1); // np should
assert np < NUM_PAIRS : "invalid correllation pair";
int tx = nt % tilesX;
int ty = nt / tilesX;
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
//java.lang.ArrayIndexOutOfBoundsException: 20081634
int indx1 = (ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1);
int indx2 = i*corr_size+j;
// if ((indx1 > data[0].length) || (indx1 > data[0].length)){
// System.out.println("Bugggg!)");
// }
data[np][indx1] = corr2d[n][indx2];
}
}
}
if (wh != null) {
wh[0] = width;
wh[1] = height;
}
return data;
}
// All data is already copied to GPU memory
public void execConverCorrectTiles() {
if (GPU_CONVERT_CORRECT_TILES_kernel == null)
......@@ -542,7 +697,7 @@ public class GPUTileProcessor {
return;
}
// kernel parameters: pointer to pointers
int [] GridFullWarps = {(num_task_tiles + TILES_PER_BLOCK -1 )/TILES_PER_BLOCK, 1, 1};
int [] GridFullWarps = {(num_task_tiles + TILES_PER_BLOCK -1 )/TILES_PER_BLOCK, 1, 1}; // round up
int [] ThreadsFullWarps = {THREADSX, TILES_PER_BLOCK, 1};
Pointer kernelParameters = Pointer.to(
Pointer.to(gpu_kernel_offsets),
......@@ -550,7 +705,11 @@ public class GPUTileProcessor {
Pointer.to(gpu_bayer),
Pointer.to(gpu_tasks),
Pointer.to(gpu_clt),
/* 2020*/// Pointer.to(gpu_corrs),
/* 2020*/// Pointer.to(gpu_corr_indices), // corr indices (tile_num <<8 + pair_index
/* 2020*/// Pointer.to(new int[] { num_corr_tiles }), // total number of 2D correlations to calculate
Pointer.to(new int[] { mclt_stride }),
/* 2020*/// Pointer.to(new int[] { corr_stride }),
Pointer.to(new int[] { num_task_tiles }),
Pointer.to(new int[] { 7 }) // lpf_mask
);
......@@ -604,6 +763,68 @@ public class GPUTileProcessor {
cuCtxSynchronize();
}
public void execCorr2D(
double [] scales,
double fat_zero) {
if (GPU_CORRELATE2D_kernel == null)
{
IJ.showMessage("Error", "No GPU kernel: GPU_CORRELATE2D_kernel");
return;
}
int num_colors = scales.length;
if (num_colors > 3) num_colors = 3;
float fscale0 = (float) scales[0];
float fscale1 = (num_colors >1)?((float) scales[1]):0.0f;
float fscale2 = (num_colors >2)?((float) scales[2]):0.0f;
int [] GridFullWarps = {(num_corr_tiles + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1};
int [] ThreadsFullWarps = {CORR_THREADS_PER_TILE, CORR_TILES_PER_BLOCK, 1};
Pointer kernelParameters = Pointer.to(
Pointer.to(gpu_clt),
Pointer.to(new int[] { num_colors }),
Pointer.to(new float[] {fscale0 }),
Pointer.to(new float[] {fscale1 }),
Pointer.to(new float[] {fscale2 }),
Pointer.to(new float[] {(float) fat_zero }),
Pointer.to(new int[] { num_corr_tiles }), // lpf_mask
Pointer.to(gpu_corr_indices),
Pointer.to(new int[] { corr_stride }),
Pointer.to(gpu_corrs) // lpf_mask
);
cuCtxSynchronize();
// Call the kernel function
cuLaunchKernel(GPU_CORRELATE2D_kernel,
GridFullWarps[0], GridFullWarps[1], GridFullWarps[2], // Grid dimension
ThreadsFullWarps[0], ThreadsFullWarps[1],ThreadsFullWarps[2],// Block dimension
0, null, // Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters, null); // Kernel- and extra parameters
cuCtxSynchronize();
}
public float [][] getCorr2D(){
float [] cpu_corrs = new float [ num_corr_tiles * CORR_SIZE];
CUDA_MEMCPY2D copyD2H = new CUDA_MEMCPY2D();
copyD2H.srcMemoryType = CUmemorytype.CU_MEMORYTYPE_DEVICE;
copyD2H.srcDevice = gpu_corrs;
copyD2H.srcPitch = corr_stride * Sizeof.FLOAT;
copyD2H.dstMemoryType = CUmemorytype.CU_MEMORYTYPE_HOST;
copyD2H.dstHost = Pointer.to(cpu_corrs);
copyD2H.dstPitch = CORR_SIZE * Sizeof.FLOAT;
copyD2H.WidthInBytes = CORR_SIZE * Sizeof.FLOAT;
copyD2H.Height = num_corr_tiles;
cuMemcpy2D(copyD2H); // run copy
float [][] corrs = new float [num_corr_tiles][ CORR_SIZE];
for (int ncorr = 0; ncorr < num_corr_tiles; ncorr++) {
System.arraycopy(cpu_corrs, ncorr*CORR_SIZE, corrs[ncorr], 0, CORR_SIZE);
}
return corrs;
}
public float [][] getRBG (int ncam){
int height = (IMG_HEIGHT + DTT_SIZE);
int width = (IMG_WIDTH + DTT_SIZE);
......@@ -791,23 +1012,20 @@ public class GPUTileProcessor {
}
public void setLpfRbg(
float sigma_r,
float sigma_b,
float sigma_g)
float [][] lpf_rbg) // 3 or single 64-el. array(s)
{
int dct_size = DTT_SIZE;
DttRad2 dtt = new DttRad2(dct_size);
double [][] lpf_rbg = {
dtt.dttt_iiie(setCltLpf(sigma_r)),
dtt.dttt_iiie(setCltLpf(sigma_b)),
dtt.dttt_iiie(setCltLpf(sigma_g))};
int l = dct_size*dct_size;
int l = lpf_rbg[0].length; // 64
float [] lpf_flat = new float [3 * l];
for (int i = 0; i < 3;i++) {
// System.arraycopy(lpf_rbg[i], 0, lpf_flat, l* i, l);
int ii = i;
if (ii > lpf_rbg.length) {
ii = 0; // mono
}
for (int j = 0; j < l; j++) {
lpf_flat[j+i*l] = (float) (lpf_rbg[i][j]*2*dct_size);
// lpf_flat[j + ii*l] = (float) (lpf_rbg[i][j]*2*dct_size);
lpf_flat[j + ii*l] = lpf_rbg[i][j];
}
}
......@@ -821,14 +1039,51 @@ public class GPUTileProcessor {
System.out.println("constantMemorySize: " + constantMemorySize);
cuMemcpyHtoD(constantMemoryPointer, Pointer.to(lpf_flat), constantMemorySize);
System.out.println();
}
public void setLpfCorr(
float [] lpf_flat)
{
CUdeviceptr constantMemoryPointer = new CUdeviceptr();
long constantMemorySizeArray[] = { 0 };
cuModuleGetGlobal(constantMemoryPointer, constantMemorySizeArray, module, "lpf_corr");
int constantMemorySize = (int)constantMemorySizeArray[0];
System.out.println("constantMemoryPointer: " + constantMemoryPointer);
System.out.println("constantMemorySize: " + constantMemorySize);
cuMemcpyHtoD(constantMemoryPointer, Pointer.to(lpf_flat), constantMemorySize);
System.out.println();
}
public float [] floatSetCltLpfFd(
double sigma) {
int dct_size = DTT_SIZE;
DttRad2 dtt = new DttRad2(dct_size);
double [] clt_fd = dtt.dttt_iiie(setCltLpf(sigma));
int l = dct_size*dct_size;
float [] lpf_flat = new float [l];
for (int j = 0; j < l; j++) {
lpf_flat[j] = (float) (clt_fd[j]*2*dct_size);
}
return lpf_flat;
}
public double [] doubleSetCltLpfFd(
double sigma) {
int dct_size = DTT_SIZE;
DttRad2 dtt = new DttRad2(dct_size);
double [] clt_fd = dtt.dttt_iiie(setCltLpf(sigma));
int l = dct_size*dct_size;
double [] lpf_flat = new double [l];
for (int j = 0; j < l; j++) {
lpf_flat[j] = (float) (clt_fd[j]*2*dct_size);
}
return lpf_flat;
}
public double [] setCltLpf(
double sigma)
{
int dct_size = DTT_SIZE;
double [] lpf = new double [dct_size*dct_size];
int dct_len = dct_size * dct_size;
if (sigma == 0.0f) {
......
......@@ -194,6 +194,147 @@ public class Correlation2d {
return this.transpose_all_diagonal;
}
/**
* Multiply CLT data of two channels, OK with null inputs (missing colors for monochrome images)
* @param clt_data1 first operand FD CLT data[4][transform_len]
* @param clt_data2 second operand FD CLT data[4][transform_len]
* @return [4][transform_len] FD CLT data
*/
public double[][] correlateSingleColorFD(
double [][] clt_data1,
double [][] clt_data2,
double [][] tcorr){ // null or initialized to [4][transform_len]
if (tcorr == null) tcorr = new double [4][transform_len];
if ((clt_data1 == null) || (clt_data1 == null)) return null; // to work with missing colors for monochrome
for (int i = 0; i < transform_len; i++) {
for (int n = 0; n<4; n++){
tcorr[n][i] = 0;
for (int k=0; k<4; k++){
if (ZI[n][k] < 0)
tcorr[n][i] -=
clt_data1[-ZI[n][k]][i] * clt_data2[k][i];
else
tcorr[n][i] +=
clt_data1[ZI[n][k]][i] * clt_data2[k][i];
}
}
}
return tcorr;
}
/**
* Normalize 2D correlation in FD, LPF (if not null) and convert to pixel domain and trim
* @param tcorr FD representation of the correlation[4][64]
* @param lpf LPF [64] or null
* @param afat_zero2 fat zero to add during normalization, units of squared values
* @param corr_radius if >=0 and < 7 - extract only the central part of the 15x15 square
* @return 2D phase correlation in linescan order
*/
public double[] normalizeConvertCorr(
double [][] tcorr, // null or initialized to [4][transform_len]
double [] lpf,
double afat_zero2, // absolute fat zero, same units as components squared values
int corr_radius,
boolean debug_gpu){
if (tcorr == null) return null;
double afat_zero4 = afat_zero2*afat_zero2;
for (int i = 0; i < transform_len; i++) {
double s = afat_zero4;
for (int n = 0; n< 4; n++){
s += tcorr[n][i]*tcorr[n][i];
}
double k = 1.0/ Math.sqrt(s);
for (int n = 0; n< 4; n++){
tcorr[n][i]*= k;
}
}
if (debug_gpu) {
System.out.println("=== NORMALIZED CORRELATION , afat_zero2="+afat_zero2+", afat_zero4="+afat_zero4+" ===");
for (int dct_mode = 0; dct_mode < 4; dct_mode++) {
System.out.println("------dct_mode="+dct_mode);
for (int i = 0; i < transform_size; i++) {
for (int j = 0; j < transform_size; j++) {
System.out.print(String.format("%10.5f ", tcorr[dct_mode][transform_size * i + j]));
}
System.out.println();
}
}
}
if (lpf != null) {
if (debug_gpu) {
System.out.println("=== LPF for CORRELATION ===");
for (int i = 0; i < transform_size; i++) {
for (int j = 0; j < transform_size; j++) {
System.out.print(String.format("%10.5f ", lpf[transform_size * i + j]));
}
System.out.println();
}
}
for (int n = 0; n<4; n++) {
for (int i = 0; i < transform_len; i++) {
tcorr[n][i] *= lpf[i];
}
}
}
if (debug_gpu) {
System.out.println("=== LPF-ed CORRELATION ===");
for (int dct_mode = 0; dct_mode < 4; dct_mode++) {
System.out.println("------dct_mode="+dct_mode);
for (int i = 0; i < transform_size; i++) {
for (int j = 0; j < transform_size; j++) {
System.out.print(String.format("%10.5f ", tcorr[dct_mode][transform_size * i + j]));
}
System.out.println();
}
}
}
for (int quadrant = 0; quadrant < 4; quadrant++){
int mode = ((quadrant << 1) & 2) | ((quadrant >> 1) & 1); // transpose
tcorr[quadrant] = dtt.dttt_iie(tcorr[quadrant], mode, transform_size, debug_gpu); // not orthogonal, term[0] is NOT *= 1/sqrt(2)
}
if (debug_gpu) {
System.out.println("=== CONVERTED CORRELATION ===");
for (int dct_mode = 0; dct_mode < 4; dct_mode++) {
System.out.println("------dct_mode="+dct_mode);
for (int i = 0; i < transform_size; i++) {
for (int j = 0; j < transform_size; j++) {
System.out.print(String.format("%10.5f ", tcorr[dct_mode][transform_size * i + j]));
}
System.out.println();
}
}
}
// convert from 4 quadrants to 15x15 centered tiles (only composite)
double [] corr_pd = dtt.corr_unfold_tile(tcorr, transform_size);
if (debug_gpu) {
int corr_size = 2* transform_size -1;
System.out.println("=== UNFOLDED CORRELATION ===");
for (int i = 0; i < corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
System.out.print(String.format("%10.5f ", corr_pd[corr_size * i + j]));
}
System.out.println();
}
}
if ((corr_radius <= 0) || (corr_radius >= (transform_size - 1))) {
return corr_pd;
}
int full_size = 2 * transform_size - 1;
int trimmed_size = 2 * corr_radius + 1;
int trim = transform_size - 1 - corr_radius;
double [] trimmed_pd = new double [trimmed_size * trimmed_size];
int ioffs = (full_size + 1)*trim;
for (int orow = 0; orow < trimmed_size; orow++) {
System.arraycopy(corr_pd, orow*full_size + ioffs, trimmed_pd, orow*trimmed_size, trimmed_size);
}
return trimmed_pd;
}
/**
* Multiply CLT data of two channels, normalize amplitude, OK with null inputs (missing colors for monochrome images)
* @param clt_data1 first operand FD CLT data[4][transform_len]
......@@ -343,7 +484,7 @@ public class Correlation2d {
double scale_value, // scale correlation value
double [] col_weights,
double fat_zero) {
double [][][][] clt_data_tile = new double[clt_data.length][][][];
double [][][][] clt_data_tile = new double[clt_data.length][][][]; // [camera][color][quadrant][index]
for (int ncam = 0; ncam < clt_data.length; ncam++) if (clt_data[ncam] != null){
clt_data_tile[ncam] = new double[clt_data[ncam].length][][];
for (int ncol = 0; ncol < clt_data[ncam].length; ncol++) if ((clt_data[ncam][ncol] != null) && (clt_data[ncam][ncol][tileY] != null)){
......
......@@ -613,7 +613,61 @@ public class DttRad2 {
return y;
}
public double [] dttt_iie(double [] x, int mode, int n, boolean debug_gpu){
double [] y = new double [n*n];
double [] line = new double[n];
// first (horizontal) pass
for (int i = 0; i<n; i++){
System.arraycopy(x, n*i, line, 0, n);
line = ((mode & 1)!=0)? dstiie_direct(line):dctiie_direct(line);
for (int j=0; j < n;j++) y[j*n+i] =line[j]; // transpose
}
if (debug_gpu) {
System.out.println("------after hor, mode="+mode);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
System.out.print(String.format("%10.5f ", y[n * i + j]));
}
System.out.println();
}
}
// second (vertical) pass
for (int i = 0; i<n; i++){
System.arraycopy(y, n*i, line, 0, n);
line = ((mode & 2)!=0)? dstiie_direct(line):dctiie_direct(line);
System.arraycopy(line, 0, y, n*i, n);
}
if (debug_gpu) {
System.out.println("------after vert, mode="+mode);
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
System.out.print(String.format("%10.5f ", y[n * i + j]));
}
System.out.println();
}
}
return y;
}
/*
if (debug_gpu) {
System.out.println("=== CONVERTED CORRELATION ===");
for (int dct_mode = 0; dct_mode < 4; dct_mode++) {
System.out.println("------dct_mode="+dct_mode);
for (int i = 0; i < transform_size; i++) {
for (int j = 0; j < transform_size; j++) {
System.out.print(String.format("%10.3f ", tcorr[dct_mode][transform_size * i + j]));
}
System.out.println();
}
}
}
*/
public double [] dttt_iii(double [] x){
......@@ -780,7 +834,7 @@ public class DttRad2 {
}
public double [] dctii_direct(double[] x){
public double [] dctii_direct(double[] x){ // orthogonal, term[0] *= 1/sqrt(2)
int n = x.length;
int t = ilog2(n)-1;
if (CII==null){
......@@ -796,7 +850,7 @@ public class DttRad2 {
return y;
}
public double [] dctiie_direct(double[] x){
public double [] dctiie_direct(double[] x){ // not orthogonal
int n = x.length;
int t = ilog2(n)-1;
if (CIIe==null){
......@@ -928,7 +982,7 @@ public class DttRad2 {
}
}
private void setup_CII(int maxN){
private void setup_CII(int maxN){ // orthogonal, term[0] *= 1/sqrt(2)
if (maxN > N) setup_arrays(maxN);
int l = ilog2(N);
if (!(CII==null) && (CII.length >= l)) return;
......@@ -949,7 +1003,7 @@ public class DttRad2 {
}
}
private void setup_CIIe(int maxN){
private void setup_CIIe(int maxN){ // not orthogonal
if (maxN > N) setup_arrays(maxN);
int l = ilog2(N);
if (!(CIIe==null) && (CIIe.length >= l)) return;
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -307,6 +307,7 @@ public class MacroCorrelation {
// double [][][][] texture_tiles = save_textures ? new double [tilesY][tilesX][][] : null; // ["RGBA".length()][];
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
this.mtp.isMonochrome(),
clt_parameters.getScaleStrength(this.mtp.isAux()));
image_dtt.clt_aberrations_quad_corr(
......@@ -350,7 +351,7 @@ public class MacroCorrelation {
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
null, // clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
/// clt_parameters.transform_size,
clt_parameters.clt_window,
shiftXY, //
0.0, // disparity_corr, // final double disparity_corr, // disparity at infinity
......
......@@ -580,7 +580,10 @@ public class QuadCLT {
double [] kernel= new double[kernelSize*kernelSize];
int centered_len = (2*dtt_size-1) * (2*dtt_size-1);
double [] kernel_centered = new double [centered_len + extra_items];
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
int chn,tileY,tileX;
DttRad2 dtt = new DttRad2(dtt_size);
ShowDoubleFloatArrays sdfa_instance = null;
......@@ -622,8 +625,8 @@ public class QuadCLT {
kernel, // double [] src_kernel, //
kernel_centered, // double [] dst_kernel, // should be (2*dtt_size-1) * (2*dtt_size-1) + extra_items size - kernel and dx, dy to the nearest 1/2 pixels
// also actual full center shifts in sensor pixels
kernelSize, // int src_size, // 64
dtt_size); // 8
kernelSize); // , // int src_size, // 64
/// dtt_size); // 8
if ((globalDebugLevel > 0) && (tileY == clt_parameters.tileY/2) && (tileX == clt_parameters.tileX/2)) {
int length=kernel_centered.length;
int size=(int) Math.sqrt(length);
......@@ -642,7 +645,7 @@ public class QuadCLT {
image_dtt.clt_normalize_kernel( //
kernel_centered, // double [] kernel, // should be (2*dtt_size-1) * (2*dtt_size-1) + extra_items size (last (2*dtt_size-1) are not modified)
norm_sym_weights, // double [] window, // normalizes result kernel * window to have sum of elements == 1.0
dtt_size,
/// dtt_size,
(globalDebugLevel > 0) && (tileY == clt_parameters.tileY/2) && (tileX == clt_parameters.tileX/2)); // 8
if ((globalDebugLevel > 0) && (tileY == clt_parameters.tileY/2) && (tileX == clt_parameters.tileX/2)) {
int length=kernel_centered.length;
......@@ -661,8 +664,8 @@ public class QuadCLT {
}
image_dtt.clt_symmetrize_kernel( //
kernel_centered, // double [] kernel, // should be (2*dtt_size-1) * (2*dtt_size-1) +4 size (last 4 are not modified)
clt_kernels[chn][tileY][tileX], // double [][] sym_kernels, // set of 4 SS, AS, SA, AA kdernels, each dtt_size * dtt_size (may have 5-th with center shift
dtt_size); // 8
clt_kernels[chn][tileY][tileX]); // , // double [][] sym_kernels, // set of 4 SS, AS, SA, AA kdernels, each dtt_size * dtt_size (may have 5-th with center shift
/// dtt_size); // 8
for (int i = 0; i < extra_items; i++){
clt_kernels[chn][tileY][tileX][4][i] = kernel_centered [centered_len + i];
}
......@@ -756,7 +759,7 @@ public class QuadCLT {
if (globalDebugLevel > 1) System.out.println("Threads done at "+IJ.d2s(0.000000001*(System.nanoTime()-startTime),3));
System.out.println("1.Threads done at "+IJ.d2s(0.000000001*(System.nanoTime()-startTime),3));
// Calculate differential offsets to interpolate for tiles between kernel centers
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(clt_parameters.transform_size,isMonochrome(),clt_parameters.getScaleStrength(isAux()));
image_dtt.clt_fill_coord_corr(
clt_parameters.kernel_step, // final int kern_step, // distance between kernel centers, in pixels.
clt_kernels, // final double [][][][] clt_data,
......@@ -1716,7 +1719,10 @@ public class QuadCLT {
sdfa_instance.showArrays(double_stack, imp_src.getWidth(), imp_src.getHeight(), true, "BEFORE_CLT_PROC", rbg_titles);
}
if (this.correctionsParameters.deconvolve) { // process with DCT, otherwise use simple debayer
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
for (int i =0 ; i < double_stack[0].length; i++){
double_stack[2][i]*=0.5; // Scale blue twice to compensate less pixels than green
}
......@@ -1725,7 +1731,7 @@ public class QuadCLT {
imp_src.getWidth(), // final int width,
clt_kernels[channel], // final double [][][][][] clt_kernels, // [color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
// clt_parameters.transform_size,
clt_parameters.clt_window,
clt_parameters.shift_x, // final int shiftX, // shift image horizontally (positive - right) - just for testing
clt_parameters.shift_y, // final int shiftY, // shift image vertically (positive - down)
......@@ -1760,7 +1766,7 @@ public class QuadCLT {
image_dtt.clt_lpf(
clt_parameters.getCorrSigma(image_dtt.isMonochrome()),
clt_data[chn],
clt_parameters.transform_size,
/// clt_parameters.transform_size,
threadsMax,
debugLevel);
}
......@@ -1768,8 +1774,8 @@ public class QuadCLT {
/*
}
*/
int tilesY = imp_src.getHeight()/clt_parameters.transform_size;
int tilesX = imp_src.getWidth()/clt_parameters.transform_size;
int tilesY = imp_src.getHeight()/image_dtt.transform_size;
int tilesX = imp_src.getWidth()/image_dtt.transform_size;
if (debugLevel > 0){
System.out.println("--tp.tilesX="+tilesX);
System.out.println("--tp.tilesY="+tilesY);
......@@ -1786,8 +1792,8 @@ public class QuadCLT {
if (debugLevel > 0){
sdfa_instance.showArrays(clt,
tilesX*clt_parameters.transform_size,
tilesY*clt_parameters.transform_size,
tilesX*image_dtt.transform_size,
tilesY*image_dtt.transform_size,
true,
result.getTitle()+"-CLT");
}
......@@ -1796,7 +1802,7 @@ public class QuadCLT {
for (int chn=0; chn<clt_data.length;chn++){
iclt_data[chn] = image_dtt.iclt_2d(
clt_data[chn], // scanline representation of dcd data, organized as dct_size x dct_size tiles
clt_parameters.transform_size, // final int
// image_dtt.transform_size, // final int
clt_parameters.clt_window, // window_type
15, // clt_parameters.iclt_mask, //which of 4 to transform back
0, // clt_parameters.dbg_mode, //which of 4 to transform back
......@@ -1806,8 +1812,8 @@ public class QuadCLT {
}
if (debugLevel > -1) sdfa_instance.showArrays(
iclt_data,
(tilesX + 1) * clt_parameters.transform_size,
(tilesY + 1) * clt_parameters.transform_size,
(tilesX + 1) * image_dtt.transform_size,
(tilesY + 1) * image_dtt.transform_size,
true,
result.getTitle()+"-rbg_sigma");
/*
......@@ -1815,8 +1821,8 @@ public class QuadCLT {
}
*/
if (debugLevel > 0) sdfa_instance.showArrays(iclt_data,
(tilesX + 1) * clt_parameters.transform_size,
(tilesY + 1) * clt_parameters.transform_size,
(tilesX + 1) * image_dtt.transform_size,
(tilesY + 1) * image_dtt.transform_size,
true,
result.getTitle()+"-ICLT-RGB");
......@@ -1824,8 +1830,8 @@ public class QuadCLT {
String [] sliceNames = {"red", "blue", "green"};
stack = sdfa_instance.makeStack(
iclt_data,
(tilesX + 1) * clt_parameters.transform_size,
(tilesY + 1) * clt_parameters.transform_size,
(tilesX + 1) * image_dtt.transform_size,
(tilesY + 1) * image_dtt.transform_size,
sliceNames); // or use null to get chn-nn slice names
......@@ -2318,7 +2324,10 @@ public class QuadCLT {
sdfa_instance.showArrays(double_stack, imp_src.getWidth(), imp_src.getHeight(), true, "BEFORE_CLT_PROC", rbg_titles);
}
if (this.correctionsParameters.deconvolve) { // process with DCT, otherwise use simple debayer
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
for (int i =0 ; i < double_stack[0].length; i++){
double_stack[2][i]*=0.5; // Scale blue twice to compensate less pixels than green
}
......@@ -2327,7 +2336,7 @@ public class QuadCLT {
imp_src.getWidth(), // final int width,
clt_kernels[channel], // final double [][][][][] clt_kernels, // [color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
// image_dtt.transform_size,
clt_parameters.clt_window,
clt_parameters.shift_x, // final int shiftX, // shift image horizontally (positive - right) - just for testing
clt_parameters.shift_y, // final int shiftY, // shift image vertically (positive - down)
......@@ -2362,7 +2371,7 @@ public class QuadCLT {
image_dtt.clt_lpf(
clt_parameters.getCorrSigma(image_dtt.isMonochrome()),
clt_data[chn],
clt_parameters.transform_size,
/// image_dtt.transform_size,
threadsMax,
debugLevel);
}
......@@ -2370,8 +2379,8 @@ public class QuadCLT {
/*
}
*/
int tilesY = imp_src.getHeight()/clt_parameters.transform_size;
int tilesX = imp_src.getWidth()/clt_parameters.transform_size;
int tilesY = imp_src.getHeight()/image_dtt.transform_size;
int tilesX = imp_src.getWidth()/image_dtt.transform_size;
if (debugLevel > 0){
System.out.println("--tilesX="+tilesX);
System.out.println("--tilesY="+tilesY);
......@@ -2388,8 +2397,8 @@ public class QuadCLT {
if (debugLevel > 0){
sdfa_instance.showArrays(clt,
tilesX*clt_parameters.transform_size,
tilesY*clt_parameters.transform_size,
tilesX*image_dtt.transform_size,
tilesY*image_dtt.transform_size,
true,
result.getTitle()+"-CLT");
}
......@@ -2398,7 +2407,7 @@ public class QuadCLT {
for (int chn=0; chn<clt_data.length;chn++){
iclt_data[chn] = image_dtt.iclt_2d(
clt_data[chn], // scanline representation of dcd data, organized as dct_size x dct_size tiles
clt_parameters.transform_size, // final int
/// image_dtt.transform_size, // final int
clt_parameters.clt_window, // window_type
15, // clt_parameters.iclt_mask, //which of 4 to transform back
0, // clt_parameters.dbg_mode, //which of 4 to transform back
......@@ -2410,8 +2419,8 @@ public class QuadCLT {
// if (debugLevel > -1) System.out.println("Applyed LPF, sigma = "+dct_parameters.dbg_sigma);
if (debugLevel > 0) sdfa_instance.showArrays(
iclt_data,
(tilesX + 1) * clt_parameters.transform_size,
(tilesY + 1) * clt_parameters.transform_size,
(tilesX + 1) * image_dtt.transform_size,
(tilesY + 1) * image_dtt.transform_size,
true,
result.getTitle()+"-rbg_sigma");
/*
......@@ -2419,8 +2428,8 @@ public class QuadCLT {
}
*/
if (debugLevel > 0) sdfa_instance.showArrays(iclt_data,
(tilesX + 0) * clt_parameters.transform_size,
(tilesY + 0) * clt_parameters.transform_size,
(tilesX + 0) * image_dtt.transform_size,
(tilesY + 0) * image_dtt.transform_size,
true,
result.getTitle()+"-ICLT-RGB");
......@@ -2428,8 +2437,8 @@ public class QuadCLT {
String [] sliceNames = {"red", "blue", "green"};
stack = sdfa_instance.makeStack(
iclt_data,
(tilesX + 0) * clt_parameters.transform_size,
(tilesY + 0) * clt_parameters.transform_size,
(tilesX + 0) * image_dtt.transform_size,
(tilesY + 0) * image_dtt.transform_size,
sliceNames); // or use null to get chn-nn slice names
......@@ -2883,7 +2892,10 @@ public class QuadCLT {
// String [] rbg_titles = {"Red", "Blue", "Green"};
ImageStack stack;
// =================
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
for (int i = 0; i < double_stacks.length; i++){
for (int j =0 ; j < double_stacks[i][0].length; j++){
double_stacks[i][2][j]*=0.5; // Scale green 0.5 to compensate more pixels than R,B
......@@ -2896,7 +2908,7 @@ public class QuadCLT {
geometryCorrection, // final GeometryCorrection geometryCorrection,
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
// image_dtt.transform_size,
clt_parameters.clt_window,
clt_parameters.shift_x, // final int shiftX, // shift image horizontally (positive - right) - just for testing
clt_parameters.shift_y, // final int shiftY, // shift image vertically (positive - down)
......@@ -2924,14 +2936,14 @@ public class QuadCLT {
image_dtt.clt_lpf(
clt_parameters.getCorrSigma(image_dtt.isMonochrome()),
clt_data[iQuad][chn],
clt_parameters.transform_size,
/// image_dtt.transform_size,
threadsMax,
debugLevel);
}
}
int tilesY = imp_quad[iQuad].getHeight()/clt_parameters.transform_size;
int tilesX = imp_quad[iQuad].getWidth()/clt_parameters.transform_size;
int tilesY = imp_quad[iQuad].getHeight()/image_dtt.transform_size;
int tilesX = imp_quad[iQuad].getWidth()/image_dtt.transform_size;
if (debugLevel > 0){
System.out.println("--tp.tilesX="+tilesX);
System.out.println("--tp.tilesY="+tilesY);
......@@ -2948,8 +2960,8 @@ public class QuadCLT {
if (debugLevel > 0){
sdfa_instance.showArrays(clt,
tilesX*clt_parameters.transform_size,
tilesY*clt_parameters.transform_size,
tilesX*image_dtt.transform_size,
tilesY*image_dtt.transform_size,
true,
results[iQuad].getTitle()+"-CLT");
}
......@@ -2958,7 +2970,7 @@ public class QuadCLT {
for (int chn=0; chn<iclt_data.length;chn++){
iclt_data[chn] = image_dtt.iclt_2d(
clt_data[iQuad][chn], // scanline representation of dcd data, organized as dct_size x dct_size tiles
clt_parameters.transform_size, // final int
/// image_dtt.transform_size, // final int
clt_parameters.clt_window, // window_type
15, // clt_parameters.iclt_mask, //which of 4 to transform back
0, // clt_parameters.dbg_mode, //which of 4 to transform back
......@@ -2968,13 +2980,13 @@ public class QuadCLT {
}
if (debugLevel > 0) sdfa_instance.showArrays(
iclt_data,
(tilesX + 0) * clt_parameters.transform_size,
(tilesY + 0) * clt_parameters.transform_size,
(tilesX + 0) * image_dtt.transform_size,
(tilesY + 0) * image_dtt.transform_size,
true,
results[iQuad].getTitle()+"-rbg_sigma");
if (debugLevel > 0) sdfa_instance.showArrays(iclt_data,
(tilesX + 0) * clt_parameters.transform_size,
(tilesY + 0) * clt_parameters.transform_size,
(tilesX + 0) * image_dtt.transform_size,
(tilesY + 0) * image_dtt.transform_size,
true,
results[iQuad].getTitle()+"-ICLT-RGB");
......@@ -2982,8 +2994,8 @@ public class QuadCLT {
String [] sliceNames = {"red", "blue", "green"};
stack = sdfa_instance.makeStack(
iclt_data,
(tilesX + 0) * clt_parameters.transform_size,
(tilesY + 0) * clt_parameters.transform_size,
(tilesX + 0) * image_dtt.transform_size,
(tilesY + 0) * image_dtt.transform_size,
sliceNames); // or use null to get chn-nn slice names
if (debugLevel > -1){
......@@ -4115,7 +4127,10 @@ public class QuadCLT {
this.is_mono);
}
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
for (int i = 0; i < double_stacks.length; i++){
if ( double_stacks[i].length > 2) {
for (int j =0 ; j < double_stacks[i][0].length; j++){
......@@ -4232,7 +4247,7 @@ public class QuadCLT {
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
/// image_dtt.transform_size,
clt_parameters.clt_window,
shiftXY, //
disparity_corr, // final double disparity_corr, // disparity at infinity
......@@ -4265,15 +4280,15 @@ public class QuadCLT {
if (clt_parameters.show_nonoverlap){// not used in lwir
texture_nonoverlap = image_dtt.combineRBGATiles(
texture_tiles, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
/// image_dtt.transform_size,
false, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
debugLevel);
sdfa_instance.showArrays(
texture_nonoverlap,
tilesX * (2 * clt_parameters.transform_size),
tilesY * (2 * clt_parameters.transform_size),
tilesX * (2 * image_dtt.transform_size),
tilesY * (2 * image_dtt.transform_size),
true,
name+sAux() + "-TXTNOL-D"+clt_parameters.disparity,
(clt_parameters.keep_weights?rbga_weights_titles:rbga_titles));
......@@ -4283,7 +4298,7 @@ public class QuadCLT {
int alpha_index = 3;
texture_overlap = image_dtt.combineRBGATiles(
texture_tiles, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
/// image_dtt.transform_size,
true, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
......@@ -4302,8 +4317,8 @@ public class QuadCLT {
if (!batch_mode && clt_parameters.show_overlap) {// not used in lwir
sdfa_instance.showArrays( // all but r-rms, b-rms
texture_overlap,
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
true,
name+sAux() + "-TXTOL-D"+clt_parameters.disparity,
(clt_parameters.keep_weights?rbga_weights_titles:rbga_titles));
......@@ -4324,8 +4339,8 @@ public class QuadCLT {
true, // boolean saveShowIntermediate, // save/show if set globally
true, // boolean saveShowFinal, // save/show result (color image?)
((clt_parameters.alpha1 > 0)? texture_rgba: texture_rgb),
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
1.0, // double scaleExposure, // is it needed?
debugLevel );
}
......@@ -4482,7 +4497,7 @@ public class QuadCLT {
for (int i = 0; i<corr_rslt.length; i++) {
corr_rslt[i] = image_dtt.corr_dbg(
clt_corr_combo[i],
2*clt_parameters.transform_size - 1,
2*image_dtt.transform_size - 1,
clt_parameters.corr_border_contrast,
threadsMax,
debugLevel);
......@@ -4490,8 +4505,8 @@ public class QuadCLT {
// all zeros
sdfa_instance.showArrays(
corr_rslt,
tilesX*(2*clt_parameters.transform_size),
tilesY*(2*clt_parameters.transform_size),
tilesX*(2*image_dtt.transform_size),
tilesY*(2*image_dtt.transform_size),
true,
name+sAux()+"-CORR-D"+clt_parameters.disparity,
titles );
......@@ -4506,7 +4521,7 @@ public class QuadCLT {
}
double [][] corr_rslt_partial = image_dtt.corr_partial_dbg(
clt_corr_partial,
2*clt_parameters.transform_size - 1, //final int corr_size,
2*image_dtt.transform_size - 1, //final int corr_size,
4, // final int pairs,
4, // final int colors,
clt_parameters.corr_border_contrast,
......@@ -4516,8 +4531,8 @@ public class QuadCLT {
System.out.println("corr_rslt_partial.length = "+corr_rslt_partial.length+", titles.length = "+titles.length);
sdfa_instance.showArrays( // out of boundary 15
corr_rslt_partial,
tilesX*(2*clt_parameters.transform_size),
tilesY*(2*clt_parameters.transform_size),
tilesX*(2*image_dtt.transform_size),
tilesY*(2*image_dtt.transform_size),
true,
name+sAux()+"-PART_CORR-D"+clt_parameters.disparity);
// titles);
......@@ -4537,7 +4552,7 @@ public class QuadCLT {
image_dtt.clt_lpf(
clt_parameters.getCorrSigma(image_dtt.isMonochrome()),
clt_data[iQuad][chn],
clt_parameters.transform_size,
/// image_dtt.transform_size,
threadsMax,
debugLevel);
}
......@@ -4559,8 +4574,8 @@ public class QuadCLT {
if (debugLevel > 0){
sdfa_instance.showArrays(clt,
tilesX*clt_parameters.transform_size,
tilesY*clt_parameters.transform_size,
tilesX*image_dtt.transform_size,
tilesY*image_dtt.transform_size,
true,
results[iQuad].getTitle()+"-CLT-D"+clt_parameters.disparity);
}
......@@ -4570,7 +4585,7 @@ public class QuadCLT {
for (int ncol=0; ncol<iclt_data[iQuad].length;ncol++) if (clt_data[iQuad][ncol] != null) {
iclt_data[iQuad][ncol] = image_dtt.iclt_2d(
clt_data[iQuad][ncol], // scanline representation of dcd data, organized as dct_size x dct_size tiles
clt_parameters.transform_size, // final int
/// image_dtt.transform_size, // final int
clt_parameters.clt_window, // window_type
15, // clt_parameters.iclt_mask, //which of 4 to transform back
0, // clt_parameters.dbg_mode, //which of 4 to transform back
......@@ -4581,8 +4596,8 @@ public class QuadCLT {
if (clt_parameters.gen_chn_stacks) sdfa_instance.showArrays(
// if (clt_parameters.gen_chn_stacks || true) sdfa_instance.showArrays(
iclt_data[iQuad],
(tilesX + 0) * clt_parameters.transform_size,
(tilesY + 0) * clt_parameters.transform_size,
(tilesX + 0) * image_dtt.transform_size,
(tilesY + 0) * image_dtt.transform_size,
true,
results[iQuad].getTitle()+"-ICLT-RGB-D"+clt_parameters.disparity);
} // end of generating shifted channel images
......@@ -4626,8 +4641,8 @@ public class QuadCLT {
!batch_mode, // true, // boolean saveShowIntermediate, // save/show if set globally
false, // boolean saveShowFinal, // save/show result (color image?)
iclt_data[iQuad],
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
scaleExposures[iQuad], // double scaleExposure, // is it needed?
debugLevel );
}
......@@ -4862,16 +4877,6 @@ public class QuadCLT {
final boolean updateStatus,
final int debugLevel){
final boolean batch_mode = clt_parameters.batch_run; //disable any debug images
// boolean advanced= this.correctionsParameters.zcorrect || this.correctionsParameters.equirectangular;
// boolean toRGB= advanced? true: this.correctionsParameters.toRGB;
// if (!batch_mode) return null;
// may use this.StartTime to report intermediate steps execution times
// String aux = isAux()?"-AUX":"";
String name=this.correctionsParameters.getModelName((String) imp_quad[0].getProperty("name"));
// int channel= Integer.parseInt((String) imp_src.getProperty("channel"));
String path= (String) imp_quad[0].getProperty("path");
......@@ -4890,7 +4895,7 @@ public class QuadCLT {
this.is_mono);
}
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(clt_parameters.transform_size,isMonochrome(),clt_parameters.getScaleStrength(isAux()));
for (int i = 0; i < double_stacks.length; i++){
if ( double_stacks[i].length > 2) {
for (int j =0 ; j < double_stacks[i][0].length; j++){
......@@ -4992,62 +4997,32 @@ public class QuadCLT {
z_correction +=clt_parameters.z_corr_map.get(name);// not used in lwir
}
final double disparity_corr = (z_correction == 0) ? 0.0 : geometryCorrection.getDisparityFromZ(1.0/z_correction);
// double [][][][][][] clt_data = image_dtt.clt_aberrations_quad_corr_min(
double [][] lazy_eye_data = image_dtt.cltMeasureLazyEye(
clt_parameters.img_dtt, // final ImageDttParameters imgdtt_params, // Now just extra correlation parameters, later will include, most others
// 1, // final int macro_scale, // to correlate tile data instead of the pixel data: 1 - pixels, 8 - tiles
tile_op, // per-tile operation bit codes
disparity_array, // final double disparity,
double_stacks, // final double [][][] imade_data, // first index - number of image in a quad
saturation_imp, // boolean [][] saturation_imp, // (near) saturated pixels or null
// correlation results - final and partial
// clt_corr_combo, // [tp.tilesY][tp.tilesX][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
// clt_corr_partial, // [tp.tilesY][tp.tilesX][pair][color][(2*transform_size-1)*(2*transform_size-1)] // if null - will not calculate
clt_mismatch, // [12][tp.tilesY * tp.tilesX] // transpose unapplied. null - do not calculate
disparity_map, // [2][tp.tilesY * tp.tilesX]
// texture_tiles, // [tp.tilesY][tp.tilesX]["RGBA".length()][];
imp_quad[0].getWidth(), // final int width,
clt_parameters.getFatZero(isMonochrome()), // add to denominator to modify phase correlation (same units as data1, data2). <0 - pure sum
// clt_parameters.corr_sym,
// clt_parameters.corr_offset,
clt_parameters.corr_red,
clt_parameters.corr_blue,
clt_parameters.getCorrSigma(image_dtt.isMonochrome()),
// clt_parameters.corr_normalize, // normalize correlation results by rms
min_corr_selected, // 0.0001; // minimal correlation value to consider valid
// clt_parameters.max_corr_sigma,// 1.5; // weights of points around global max to find fractional
// clt_parameters.max_corr_radius,
// clt_parameters.max_corr_double, // Double pass when masking center of mass to reduce preference for integer values
// clt_parameters.corr_mode, // Correlation mode: 0 - integer max, 1 - center of mass, 2 - polynomial
// clt_parameters.min_shot, // 10.0; // Do not adjust for shot noise if lower than
// clt_parameters.scale_shot, // 3.0; // scale when dividing by sqrt ( <0 - disable correction)
// clt_parameters.diff_sigma, // 5.0;//RMS difference from average to reduce weights (~ 1.0 - 1/255 full scale image)
// clt_parameters.diff_threshold, // 5.0; // RMS difference from average to discard channel (~ 1.0 - 1/255 full scale image)
// clt_parameters.diff_gauss, // true; // when averaging images, use gaussian around average as weight (false - sharp all/nothing)
// clt_parameters.min_agree, // 3.0; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
// clt_parameters.dust_remove, // Do not reduce average weight when only one image differes much from the average
// clt_parameters.keep_weights, // Add port weights to RGBA stack (debug feature)
geometryCorrection, // final GeometryCorrection geometryCorrection,
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
clt_parameters.clt_window,
shiftXY, //
disparity_corr, // final double disparity_corr, // disparity at infinity
// (clt_parameters.fcorr_ignore? null: this.fine_corr),
// clt_parameters.corr_magic_scale, // still not understood coefficient that reduces reported disparity value. Seems to be around 0.85
clt_parameters.shift_x, // final int shiftX, // shift image horizontally (positive - right) - just for testing
clt_parameters.shift_y, // final int shiftY, // shift image vertically (positive - down)
clt_parameters.tileStep, // final int tileStep, // process tileStep x tileStep cluster of tiles when adjusting lazy eye parameters
clt_parameters.tileX, // -1234, // clt_parameters.tileX, // final int debug_tileX,
clt_parameters.tileY, // final int debug_tileY, -1234 will cause port coordinates debug images
// (clt_parameters.dbg_mode & 64) != 0, // no fract shift
// (clt_parameters.dbg_mode & 128) != 0, // no convolve
// (clt_parameters.dbg_mode & 256) != 0, // transpose convolve
threadsMax,
debugLevel);
......@@ -6087,7 +6062,10 @@ public class QuadCLT {
this.is_mono);
}
// =================
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
for (int i = 0; i < double_stacks.length; i++){
for (int j =0 ; j < double_stacks[i][0].length; j++){
double_stacks[i][2][j]*=0.5; // Scale green 0.5 to compensate more pixels than R,B
......@@ -6175,7 +6153,7 @@ public class QuadCLT {
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
/// image_dtt.transform_size,
clt_parameters.clt_window,
shiftXY, //
disparity_corr, // final double disparity_corr, // disparity at infinity
......@@ -9135,10 +9113,6 @@ public class QuadCLT {
}
for (int scanIndex = next_pass; scanIndex < tp.clt_3d_passes.size(); scanIndex++){
if (debugLevel > 0){
System.out.println("FPGA processing scan #"+scanIndex);
......@@ -9464,10 +9438,13 @@ public class QuadCLT {
if (num_bgnd < clt_parameters.min_bgnd_tiles){
return null; // no background to generate // not used in lwir
}
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
double [][] texture_overlap = image_dtt.combineRBGATiles(
texture_tiles_bgnd, // texture_tiles, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
/// image_dtt.transform_size,
true, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
......@@ -9498,8 +9475,8 @@ public class QuadCLT {
true, // boolean saveShowIntermediate, // save/show if set globally
false, //true, // boolean saveShowFinal, // save/show result (color image?)
((clt_parameters.alpha1 > 0)? texture_rgba: texture_rgb),
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
1.0, // double scaleExposure, // is it needed?
debugLevel);
// resize for backdrop here!
......@@ -9562,14 +9539,18 @@ public class QuadCLT {
System.out.println("getPassImage(): Empty image!");
return null;
}
double [][]alphaFade = tp.getAlphaFade(clt_parameters.transform_size);
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
double [][]alphaFade = tp.getAlphaFade(image_dtt.transform_size);
if ((debugLevel > 0) && (scanIndex == 1)) { // not used in lwir
String [] titles = new String[16];
for (int i = 0; i<titles.length;i++) titles[i]=""+i;
sdfa_instance.showArrays(alphaFade, 2*clt_parameters.transform_size,2*clt_parameters.transform_size,true,"alphaFade",titles);
sdfa_instance.showArrays(alphaFade, 2*image_dtt.transform_size,2*image_dtt.transform_size,true,"alphaFade",titles);
}
double [][][][] texture_tiles_cluster = new double[tilesY][tilesX][][];
double [] alpha_zero = new double [4*clt_parameters.transform_size*clt_parameters.transform_size];
double [] alpha_zero = new double [4*image_dtt.transform_size*image_dtt.transform_size];
int alpha_index = 3;
for (int i = 0; i < alpha_zero.length; i++) alpha_zero[i]=0.0;
for (int tileY = 0; tileY < tilesY; tileY++){
......@@ -9598,10 +9579,9 @@ public class QuadCLT {
}
}
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
double [][] texture_overlap = image_dtt.combineRBGATiles(
texture_tiles_cluster, // texture_tiles, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
/// image_dtt.transform_size,
true, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
......@@ -9625,14 +9605,14 @@ public class QuadCLT {
if (resize) {
texture_rgbx = resizeGridTexture(
texture_rgbx,
clt_parameters.transform_size,
image_dtt.transform_size,
tilesX,
tilesY,
scan.getTextureBounds());
}
int width = resize ? (clt_parameters.transform_size * scan.getTextureBounds().width): (clt_parameters.transform_size * tilesX);
int height = resize ? (clt_parameters.transform_size * scan.getTextureBounds().height): (clt_parameters.transform_size * tilesY);
int width = resize ? (image_dtt.transform_size * scan.getTextureBounds().width): (image_dtt.transform_size * tilesX);
int height = resize ? (image_dtt.transform_size * scan.getTextureBounds().height): (image_dtt.transform_size * tilesY);
if ((width <= 0) || (height <= 0)) {
System.out.println("***** BUG in getPassImage(): width="+width+", height="+height+", resize="+resize+" ****"); // not used in lwir
}
......@@ -9648,8 +9628,8 @@ public class QuadCLT {
true, // boolean saveShowIntermediate, // save/show if set globally
false, //true, // boolean saveShowFinal, // save/show result (color image?)
texture_rgbx,
width, //tp.tilesX * clt_parameters.transform_size,
height, //tp.tilesY * clt_parameters.transform_size,
width, //tp.tilesX * image_dtt.transform_size,
height, //tp.tilesY * image_dtt.transform_size,
1.0, // double scaleExposure, // is it needed?
debugLevel);
......@@ -9773,7 +9753,10 @@ public class QuadCLT {
}
double [][][][] texture_tiles = new double [tilesY][tilesX][][]; // ["RGBA".length()][];
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
double z_correction = clt_parameters.z_correction;
if (clt_parameters.z_corr_map.containsKey(image_name)){ // not used in lwir
z_correction +=clt_parameters.z_corr_map.get(image_name);
......@@ -9794,7 +9777,7 @@ public class QuadCLT {
// Use it with disparity_maps[scan_step]? clt_mismatch, // [tp.tilesY][tp.tilesX][pair]{dx,dy,weight}[(2*transform_size-1)*(2*transform_size-1)] // transpose unapplied. null - do not calculate
disparity_map, // [12][tp.tilesY * tp.tilesX]
texture_tiles, // [tp.tilesY][tp.tilesX]["RGBA".length()][];
tilesX * clt_parameters.transform_size, // imp_quad[0].getWidth(), // final int width,
tilesX * image_dtt.transform_size, // imp_quad[0].getWidth(), // final int width,
clt_parameters.getFatZero(isMonochrome()), // add to denominator to modify phase correlation (same units as data1, data2). <0 - pure sum
clt_parameters.corr_sym,
clt_parameters.corr_offset,
......@@ -9819,7 +9802,7 @@ public class QuadCLT {
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
/// image_dtt.transform_size,
clt_parameters.clt_window,
shiftXY, //
disparity_corr, // final double disparity_corr, // disparity at infinity
......@@ -10008,8 +9991,10 @@ public class QuadCLT {
}
double [][][][] texture_tiles = save_textures ? new double [tilesY][tilesX][][] : null; // ["RGBA".length()][];
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
// final double disparity_corr = (clt_parameters.z_correction == 0) ? 0.0 : geometryCorrection.getDisparityFromZ(1.0/clt_parameters.z_correction);
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
double z_correction = clt_parameters.z_correction;
if (clt_parameters.z_corr_map.containsKey(image_name)){ // not used in lwir
z_correction +=clt_parameters.z_corr_map.get(image_name);
......@@ -10029,7 +10014,7 @@ public class QuadCLT {
// Use it with disparity_maps[scan_step]? clt_mismatch, // [tp.tilesY][tp.tilesX][pair]{dx,dy,weight}[(2*transform_size-1)*(2*transform_size-1)] // transpose unapplied. null - do not calculate
disparity_map, // [12][tp.tilesY * tp.tilesX]
texture_tiles, // [tp.tilesY][tp.tilesX]["RGBA".length()][];
tilesX * clt_parameters.transform_size, // imp_quad[0].getWidth(), // final int width,
tilesX * image_dtt.transform_size, // imp_quad[0].getWidth(), // final int width,
clt_parameters.getFatZero(isMonochrome()), // add to denominator to modify phase correlation (same units as data1, data2). <0 - pure sum
clt_parameters.corr_sym,
clt_parameters.corr_offset,
......@@ -10056,7 +10041,7 @@ public class QuadCLT {
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
/// image_dtt.transform_size,
clt_parameters.clt_window,
shiftXY, //
disparity_corr, // final double disparity_corr, // disparity at infinity
......@@ -10141,7 +10126,10 @@ public class QuadCLT {
}
double [][][][] texture_tiles = save_textures ? new double [tilesY][tilesX][][] : null; // ["RGBA".length()][];
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
double z_correction = clt_parameters.z_correction;
if (clt_parameters.z_corr_map.containsKey(image_name)){ // not used in lwir
z_correction +=clt_parameters.z_corr_map.get(image_name);
......@@ -10162,7 +10150,7 @@ public class QuadCLT {
// Use it with disparity_maps[scan_step]? clt_mismatch, // [tp.tilesY][tp.tilesX][pair]{dx,dy,weight}[(2*transform_size-1)*(2*transform_size-1)] // transpose unapplied. null - do not calculate
disparity_map, // [12][tp.tilesY * tp.tilesX]
texture_tiles, // [tp.tilesY][tp.tilesX]["RGBA".length()][];
tilesX * clt_parameters.transform_size, // imp_quad[0].getWidth(), // final int width,
tilesX * image_dtt.transform_size, // imp_quad[0].getWidth(), // final int width,
clt_parameters.getFatZero(isMonochrome()), // add to denominator to modify phase correlation (same units as data1, data2). <0 - pure sum
clt_parameters.corr_sym,
clt_parameters.corr_offset,
......@@ -10187,7 +10175,7 @@ public class QuadCLT {
geometryCorrection_main, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
/// image_dtt.transform_size,
clt_parameters.clt_window,
shiftXY, //
disparity_corr, // final double disparity_corr, // disparity at infinity
......@@ -10313,7 +10301,10 @@ public class QuadCLT {
shiftXY = shiftXY0;
}
ImageDtt image_dtt = new ImageDtt(isMonochrome(),clt_parameters.getScaleStrength(isAux()));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(isAux()));
double z_correction = clt_parameters.z_correction;
if (clt_parameters.z_corr_map.containsKey(image_name)){ // not used in lwir
z_correction +=clt_parameters.z_corr_map.get(image_name);
......@@ -10335,7 +10326,7 @@ public class QuadCLT {
null, // final double [][] clt_mismatch, // [12][tilesY * tilesX] // ***** transpose unapplied ***** ?. null - do not calculate
// values in the "main" directions have disparity (*_CM) subtracted, in the perpendicular - as is
null, // disparity_map, // [12][tp.tilesY * tp.tilesX]
tilesX * clt_parameters.transform_size, // imp_quad[0].getWidth(), // final int width,
tilesX * image_dtt.transform_size, // imp_quad[0].getWidth(), // final int width,
clt_parameters.getFatZero(isMonochrome()), // add to denominator to modify phase correlation (same units as data1, data2). <0 - pure sum
clt_parameters.corr_red,
clt_parameters.corr_blue,
......@@ -10345,7 +10336,7 @@ public class QuadCLT {
null, // final GeometryCorrection geometryCorrection_main, // if not null correct this camera (aux) to the coordinates of the main
clt_kernels, // final double [][][][][][] clt_kernels, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.kernel_step,
clt_parameters.transform_size,
// image_dtt.transform_size,
clt_parameters.clt_window,
shiftXY, //
disparity_corr, // final double disparity_corr, // disparity at infinity
......
......@@ -5806,7 +5806,10 @@ public class TileProcessor {
// show testure_tiles
double [][][][] texture_tiles = scan_prev.getTextureTiles();
ImageDtt image_dtt = new ImageDtt(isMonochrome(), clt_parameters.getScaleStrength(is_aux));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
isMonochrome(),
clt_parameters.getScaleStrength(is_aux));
double [][][] dispStrength = st.getDisparityStrengths(
clt_parameters.stMeasSel); // int stMeasSel) // = 1; // Select measurements for supertiles : +1 - combo, +2 - quad +4 - hor +8 - vert)
......@@ -5830,15 +5833,15 @@ public class TileProcessor {
if (!batch_mode && show_nonoverlap){
texture_nonoverlap = image_dtt.combineRBGATiles(
texture_tiles, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
/// image_dtt.transform_size,
false, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
debugLevel);
sdfa_instance.showArrays(
texture_nonoverlap,
tilesX * (2 * clt_parameters.transform_size),
tilesY * (2 * clt_parameters.transform_size),
tilesX * (2 * image_dtt.transform_size),
tilesY * (2 * image_dtt.transform_size),
true,
name + "-TXTNOL-D",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
......@@ -5849,7 +5852,7 @@ public class TileProcessor {
int alpha_index = 3;
texture_overlap = image_dtt.combineRBGATiles(
texture_tiles, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
/// image_dtt.transform_size,
true, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
......@@ -5868,8 +5871,8 @@ public class TileProcessor {
if (show_overlap) {
sdfa_instance.showArrays(
texture_overlap,
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
true,
name + "-TXTOL-D",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
......@@ -7182,7 +7185,9 @@ public class TileProcessor {
CLTPass3d scan_prev = clt_3d_passes.get(clt_3d_passes.size() -1); // get last one
boolean [] these_tiles = scan_prev.getSelected();
DisparityProcessor dp = new DisparityProcessor(this, clt_parameters.transform_size * geometryCorrection.getScaleDzDx());
DisparityProcessor dp = new DisparityProcessor(
this,
clt_parameters.transform_size * geometryCorrection.getScaleDzDx());
boolean [] grown = these_tiles.clone();
growTiles(
2, // grow tile selection by 1 over non-background tiles 1: 4 directions, 2 - 8 directions, 3 - 8 by 1, 4 by 1 more
......
......@@ -347,6 +347,7 @@ public class TwoQuadCLT {
}
public void prepareFilesForGPUDebug(
String save_prefix, // absolute path to the cuda project root
QuadCLT quadCLT_main,
QuadCLT quadCLT_aux,
CLTParameters clt_parameters,
......@@ -407,8 +408,9 @@ public class TwoQuadCLT {
saturation_imp_aux, //output // boolean [][] saturation_imp,
debugLevel); // int debugLevel);
// Tempporarily processing individaully with the old code
// Tempporarily processing individually with the old code
processCLTQuadCorrPairForGPU(
save_prefix, // String save_prefix,
quadCLT_main, // QuadCLT quadCLT_main,
quadCLT_aux, // QuadCLT quadCLT_aux,
imp_srcs_main, // ImagePlus [] imp_quad_main,
......@@ -638,7 +640,10 @@ public class TwoQuadCLT {
double [][] disparity_bimap = new double [ImageDtt.BIDISPARITY_TITLES.length][]; //[0] -residual disparity, [1] - orthogonal (just for debugging) last 4 - max pixel differences
ImageDtt image_dtt = new ImageDtt(quadCLT_main.isMonochrome(),clt_parameters.getScaleStrength(false));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
quadCLT_main.isMonochrome(),
clt_parameters.getScaleStrength(false));
double [][] ml_data = null;
// int [][] woi_tops = {quadCLT_main.woi_tops,quadCLT_aux.woi_tops};
......@@ -694,30 +699,30 @@ public class TwoQuadCLT {
if (clt_parameters.show_nonoverlap){
texture_nonoverlap_main = image_dtt.combineRBGATiles(
texture_tiles_main, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
// image_dtt.transform_size,
false, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
debugLevel);
sdfa_instance.showArrays(
texture_nonoverlap_main,
tilesX * (2 * clt_parameters.transform_size),
tilesY * (2 * clt_parameters.transform_size),
tilesX * (2 * image_dtt.transform_size),
tilesY * (2 * image_dtt.transform_size),
true,
name + "-TXTNOL-D"+clt_parameters.disparity+"-MAIN",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
texture_nonoverlap_aux = image_dtt.combineRBGATiles(
texture_tiles_aux, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
// image_dtt.transform_size,
false, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
debugLevel);
sdfa_instance.showArrays(
texture_nonoverlap_aux,
tilesX * (2 * clt_parameters.transform_size),
tilesY * (2 * clt_parameters.transform_size),
tilesX * (2 * image_dtt.transform_size),
tilesY * (2 * image_dtt.transform_size),
true,
name + "-TXTNOL-D"+clt_parameters.disparity+"-AUX",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
......@@ -726,7 +731,7 @@ public class TwoQuadCLT {
int alpha_index = 3;
texture_overlap_main = image_dtt.combineRBGATiles(
texture_tiles_main, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
// image_dtt.transform_size,
true, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
......@@ -744,7 +749,7 @@ public class TwoQuadCLT {
texture_overlap_aux = image_dtt.combineRBGATiles(
texture_tiles_aux, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
// image_dtt.transform_size,
true, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
......@@ -763,8 +768,8 @@ public class TwoQuadCLT {
if (!batch_mode && clt_parameters.show_overlap) {
sdfa_instance.showArrays(
texture_overlap_main,
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
true,
name + "-TXTOL-D"+clt_parameters.disparity+"-MAIN",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
......@@ -772,8 +777,8 @@ public class TwoQuadCLT {
if (!batch_mode && clt_parameters.show_overlap) {
sdfa_instance.showArrays(
texture_overlap_aux,
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
true,
name + "-TXTOL-D"+clt_parameters.disparity+"-AUX",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
......@@ -797,8 +802,8 @@ public class TwoQuadCLT {
false, // true, // boolean saveShowIntermediate, // save/show if set globally
false, // true, // boolean saveShowFinal, // save/show result (color image?)
((clt_parameters.alpha1 > 0)? texture_rgba_main: texture_rgb_main),
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
1.0, // double scaleExposure, // is it needed?
debugLevel );
ImagePlus imp_texture_aux = quadCLT_aux.linearStackToColor(
......@@ -812,8 +817,8 @@ public class TwoQuadCLT {
false, // true, // boolean saveShowIntermediate, // save/show if set globally
false, // true, // boolean saveShowFinal, // save/show result (color image?)
((clt_parameters.alpha1 > 0)? texture_rgba_aux: texture_rgb_aux),
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
1.0, // double scaleExposure, // is it needed?
debugLevel );
int width = imp_texture_main.getWidth();
......@@ -870,7 +875,7 @@ public class TwoQuadCLT {
for (int i = 0; i<corr_rslt.length; i++) {
corr_rslt[i] = image_dtt.corr_dbg(
clt_corr_combo[i],
2*clt_parameters.transform_size - 1,
2*image_dtt.transform_size - 1,
clt_parameters.corr_border_contrast,
threadsMax,
debugLevel);
......@@ -878,8 +883,8 @@ public class TwoQuadCLT {
sdfa_instance.showArrays(
corr_rslt,
tilesX*(2*clt_parameters.transform_size),
tilesY*(2*clt_parameters.transform_size),
tilesX*(2*image_dtt.transform_size),
tilesY*(2*image_dtt.transform_size),
true,
name + "-CORR-D"+clt_parameters.disparity,
titles );
......@@ -904,7 +909,7 @@ public class TwoQuadCLT {
image_dtt.clt_lpf(
clt_parameters.getCorrSigma(image_dtt.isMonochrome()),
clt_bidata[iAux][iSubCam][chn],
clt_parameters.transform_size,
// image_dtt.transform_size,
threadsMax,
debugLevel);
}
......@@ -926,8 +931,8 @@ public class TwoQuadCLT {
if (debugLevel > 0){
sdfa_instance.showArrays(clt,
tilesX*clt_parameters.transform_size,
tilesY*clt_parameters.transform_size,
tilesX*image_dtt.transform_size,
tilesY*image_dtt.transform_size,
true,
results[iQuadComb].getTitle()+"-CLT-D"+clt_parameters.disparity);
}
......@@ -936,7 +941,7 @@ public class TwoQuadCLT {
for (int chn=0; chn<iclt_data.length;chn++){
iclt_data[chn] = image_dtt.iclt_2d(
clt_bidata[iAux][iSubCam][chn], // scanline representation of dcd data, organized as dct_size x dct_size tiles
clt_parameters.transform_size, // final int
// image_dtt.transform_size, // final int
clt_parameters.clt_window, // window_type
15, // clt_parameters.iclt_mask, //which of 4 to transform back
0, // clt_parameters.dbg_mode, //which of 4 to transform back
......@@ -946,8 +951,8 @@ public class TwoQuadCLT {
}
if (clt_parameters.gen_chn_stacks) sdfa_instance.showArrays(iclt_data,
(tilesX + 0) * clt_parameters.transform_size,
(tilesY + 0) * clt_parameters.transform_size,
(tilesX + 0) * image_dtt.transform_size,
(tilesY + 0) * image_dtt.transform_size,
true,
results[iQuadComb].getTitle()+"-ICLT-RGB-D"+clt_parameters.disparity);
if (!clt_parameters.gen_chn_img) continue;
......@@ -963,8 +968,8 @@ public class TwoQuadCLT {
!batch_mode, // true, // boolean saveShowIntermediate, // save/show if set globally
false, // boolean saveShowFinal, // save/show result (color image?)
iclt_data,
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
scaleExposures[iAux][iSubCam], // double scaleExposure, // is it needed?
debugLevel );
} // end of generating shifted channel images
......@@ -1162,7 +1167,12 @@ public class TwoQuadCLT {
bb.clear();
for (int i = 0; i < image_data[chn][0].length; i++) {
// dos.writeFloat((float) (image_data[chn][0][i] + image_data[chn][1][i] + image_data[chn][2][i]));
bb.putFloat((float) (image_data[chn][0][i] + image_data[chn][1][i] + image_data[chn][2][i]));
double d = 0;
for (int c = 0; c < image_data[chn].length; c++) {
d += image_data[chn][c][i];
}
// bb.putFloat((float) (image_data[chn][0][i] + image_data[chn][1][i] + image_data[chn][2][i]));
bb.putFloat((float) d);
}
bb.flip();
channel.write(bb);
......@@ -1270,6 +1280,7 @@ public class TwoQuadCLT {
public ImagePlus [] processCLTQuadCorrPairForGPU(
String save_prefix,
QuadCLT quadCLT_main,
QuadCLT quadCLT_aux,
ImagePlus [] imp_quad_main,
......@@ -1355,21 +1366,14 @@ public class TwoQuadCLT {
}
}
double [][] disparity_bimap = new double [ImageDtt.BIDISPARITY_TITLES.length][]; //[0] -residual disparity, [1] - orthogonal (just for debugging) last 4 - max pixel differences
ImageDtt image_dtt = new ImageDtt(quadCLT_main.isMonochrome(),clt_parameters.getScaleStrength(false));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
quadCLT_main.isMonochrome(),
clt_parameters.getScaleStrength(false));
double [][] ml_data = null;
// int [][] woi_tops = {quadCLT_main.woi_tops,quadCLT_aux.woi_tops};
double [][][] ers_delay = get_ers?(new double [2][][]):null;
/// double [][][][][][] clt_kernels_main = quadCLT_main.getCLTKernels(); // [4][3][123][164]{[64],[64],[64],[64],[8]}
/// double [][][][][][] clt_kernels_aux = quadCLT_aux.getCLTKernels();
//[4][3][123][164][5][]
/// double [][] dbg_kern = clt_kernels_main[0][0][0][0];
// here all data is ready (images, kernels) to try GPU code
float [][] main_bayer = new float [quadCLT_main.image_data.length][quadCLT_main.image_data[0][0].length];
float [][] dst_bayer = new float [quadCLT_main.image_data.length][quadCLT_main.image_data[0][0].length];
for (int nc = 0; nc < main_bayer.length; nc++) {
......@@ -1382,6 +1386,7 @@ public class TwoQuadCLT {
double [][][] port_xy_main_dbg = new double [tilesX*tilesY][][];
double [][][] port_xy_aux_dbg = new double [tilesX*tilesY][][];
// double [][][] corr2ddata = new double [1][][];
final double [][][][][][][] clt_bidata = // new double[2][quad][nChn][tilesY][tilesX][][]; // first index - main/aux
image_dtt.clt_bi_quad_dbg (
......@@ -1412,24 +1417,69 @@ public class TwoQuadCLT {
quadCLT_aux.getCLTKernels(), // final double [][][][][][] clt_kernels_aux, // [channel_in_quad][color][tileY][tileX][band][pixel] , size should match image (have 1 tile around)
clt_parameters.corr_magic_scale, // final double corr_magic_scale, // still not understood coefficient that reduces reported disparity value. Seems to be around 0.85
true, // final boolean keep_clt_data,
// woi_tops, // final int [][] woi_tops,
ers_delay, // final double [][][] ers_delay, // if not null - fill with tile center acquisition delay
threadsMax, // final int threadsMax, // maximal number of threads to launch
debugLevel, // final int globalDebugLevel);
port_xy_main_dbg, // final double [][][] port_xy_main_dbg, // for each tile/port save x,y pixel coordinates (gpu code development)
port_xy_aux_dbg); // final double [][][] port_xy_aux_dbg) // for each tile/port save x,y pixel coordinates (gpu code development)
/*
if (debugLevel < 1000) {
// Create list of all correlation pairs
double [][][][][][] clt_data = clt_bidata[0];
int numTiles = tilesX * tilesY;
int numPairs = GPUTileProcessor.NUM_PAIRS;
int [] corr_indices = new int [numTiles * numPairs];
int indx=0;
for (int i = 0; i < numTiles; i++) {
for (int j = 0; j < numPairs; j++) {
corr_indices[indx++] = (i << GPUTileProcessor.CORR_PAIR_SHIFT) + j;
}
}
double [][] corrs2d = image_dtt.get2DCorrs(
clt_parameters, // final CLTParameters clt_parameters,
clt_data, // final double [][][][][][] clt_data, // [channel_in_quad][color][tileY][tileX][band][pixel];
corr_indices, // final int [] pairs_list,
threadsMax, // final int threadsMax, // maximal number of threads to launch
debugLevel); // final int debugLevel
float [][] fcorrs2d = new float [corrs2d.length][corrs2d[0].length];
// for compatibility with the actual GPUI output
for (int n = 0; n < corrs2d.length; n++) {
for (int i = 0; i < corrs2d[0].length; i++) {
fcorrs2d[n][i] = (float) corrs2d[n][i];
}
}
int [] wh = new int[2];
double [][] dbg_corr = GPUTileProcessor.getCorr2DView(
tilesX,
tilesY,
corr_indices,
fcorrs2d,
wh);
(new ShowDoubleFloatArrays()).showArrays(
dbg_corr,
wh[0],
wh[1],
true,
"CORR2D_CPU",
GPUTileProcessor.getCorrTitles());
if ((save_prefix != null) && (save_prefix != "")) {
if (debugLevel < -1000) {
return null;
}
String kernel_dir = "/home/eyesis/workspace-python3/nvidia_dct8x8/clt/";
// boolean [][] what_to_save = {{false,false,true}, {false,false,true}};
String kernel_dir = save_prefix+"clt/";
File kdir = new File(kernel_dir);
kdir.mkdir();
// boolean [][] what_to_save = {{false,false,true}, {false,false,true}};
boolean [][] what_to_save = {{true,true,true}, {true,true,true}};
try {
saveFloatKernels(
kernel_dir +"main", // String file_prefix,
(what_to_save[0][0]?clt_kernels_main:null), // double [][][][][][] clt_kernels, // null
// (what_to_save[0][0]?clt_kernels_main:null), // double [][][][][][] clt_kernels, // null
(what_to_save[0][0]?quadCLT_main.getCLTKernels():null), // double [][][][][][] clt_kernels, // null
(what_to_save[0][1]?quadCLT_main.image_data:null),
(what_to_save[0][2]?port_xy_main_dbg:null), // double [][][] port_xy,
true);
......@@ -1442,7 +1492,8 @@ public class TwoQuadCLT {
try {
saveFloatKernels(
kernel_dir +"aux", // String file_prefix,
(what_to_save[1][0]?clt_kernels_aux:null), // double [][][][][][] clt_kernels, // null
// (what_to_save[1][0]?clt_kernels_aux:null), // double [][][][][][] clt_kernels, // null
(what_to_save[1][0]?quadCLT_aux.getCLTKernels():null), // double [][][][][][] clt_kernels, // null
(what_to_save[1][1]?quadCLT_aux.image_data:null),
(what_to_save[1][2]?port_xy_aux_dbg:null), // double [][][] port_xy,
true);
......@@ -1452,15 +1503,14 @@ public class TwoQuadCLT {
e.printStackTrace();
} // boolean transpose);
if (debugLevel < 1000) {
if (debugLevel < -1000) {
return null;
}
if (ers_delay !=null) {
showERSDelay(ers_delay);
}
*/
}
double [][] texture_nonoverlap_main = null;
double [][] texture_nonoverlap_aux = null;
double [][] texture_overlap_main = null;
......@@ -1471,30 +1521,30 @@ public class TwoQuadCLT {
if (clt_parameters.show_nonoverlap){
texture_nonoverlap_main = image_dtt.combineRBGATiles(
texture_tiles_main, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
// image_dtt.transform_size,
false, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
debugLevel);
sdfa_instance.showArrays(
texture_nonoverlap_main,
tilesX * (2 * clt_parameters.transform_size),
tilesY * (2 * clt_parameters.transform_size),
tilesX * (2 * image_dtt.transform_size),
tilesY * (2 * image_dtt.transform_size),
true,
name + "-TXTNOL-D"+clt_parameters.disparity+"-MAIN",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
texture_nonoverlap_aux = image_dtt.combineRBGATiles(
texture_tiles_aux, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
// image_dtt.transform_size,
false, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
debugLevel);
sdfa_instance.showArrays(
texture_nonoverlap_aux,
tilesX * (2 * clt_parameters.transform_size),
tilesY * (2 * clt_parameters.transform_size),
tilesX * (2 * image_dtt.transform_size),
tilesY * (2 * image_dtt.transform_size),
true,
name + "-TXTNOL-D"+clt_parameters.disparity+"-AUX",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
......@@ -1503,7 +1553,7 @@ public class TwoQuadCLT {
int alpha_index = 3;
texture_overlap_main = image_dtt.combineRBGATiles(
texture_tiles_main, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
// image_dtt.transform_size,
true, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
......@@ -1521,7 +1571,7 @@ public class TwoQuadCLT {
texture_overlap_aux = image_dtt.combineRBGATiles(
texture_tiles_aux, // array [tp.tilesY][tp.tilesX][4][4*transform_size] or [tp.tilesY][tp.tilesX]{null}
clt_parameters.transform_size,
// image_dtt.transform_size,
true, // when false - output each tile as 16x16, true - overlap to make 8x8
clt_parameters.sharp_alpha, // combining mode for alpha channel: false - treat as RGB, true - apply center 8x8 only
threadsMax, // maximal number of threads to launch
......@@ -1540,8 +1590,8 @@ public class TwoQuadCLT {
if (!batch_mode && clt_parameters.show_overlap) {
sdfa_instance.showArrays(
texture_overlap_main,
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
true,
name + "-TXTOL-D"+clt_parameters.disparity+"-MAIN",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
......@@ -1549,8 +1599,8 @@ public class TwoQuadCLT {
if (!batch_mode && clt_parameters.show_overlap) {
sdfa_instance.showArrays(
texture_overlap_aux,
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
true,
name + "-TXTOL-D"+clt_parameters.disparity+"-AUX",
(clt_parameters.keep_weights?rgba_weights_titles:rgba_titles));
......@@ -1574,8 +1624,8 @@ public class TwoQuadCLT {
false, // true, // boolean saveShowIntermediate, // save/show if set globally
false, // true, // boolean saveShowFinal, // save/show result (color image?)
((clt_parameters.alpha1 > 0)? texture_rgba_main: texture_rgb_main),
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
1.0, // double scaleExposure, // is it needed?
debugLevel );
ImagePlus imp_texture_aux = quadCLT_aux.linearStackToColor(
......@@ -1589,8 +1639,8 @@ public class TwoQuadCLT {
false, // true, // boolean saveShowIntermediate, // save/show if set globally
false, // true, // boolean saveShowFinal, // save/show result (color image?)
((clt_parameters.alpha1 > 0)? texture_rgba_aux: texture_rgb_aux),
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
1.0, // double scaleExposure, // is it needed?
debugLevel );
int width = imp_texture_main.getWidth();
......@@ -1647,7 +1697,7 @@ public class TwoQuadCLT {
for (int i = 0; i<corr_rslt.length; i++) {
corr_rslt[i] = image_dtt.corr_dbg(
clt_corr_combo[i],
2*clt_parameters.transform_size - 1,
2*image_dtt.transform_size - 1,
clt_parameters.corr_border_contrast,
threadsMax,
debugLevel);
......@@ -1655,8 +1705,8 @@ public class TwoQuadCLT {
sdfa_instance.showArrays(
corr_rslt,
tilesX*(2*clt_parameters.transform_size),
tilesY*(2*clt_parameters.transform_size),
tilesX*(2*image_dtt.transform_size),
tilesY*(2*image_dtt.transform_size),
true,
name + "-CORR-D"+clt_parameters.disparity,
titles );
......@@ -1681,7 +1731,7 @@ public class TwoQuadCLT {
image_dtt.clt_lpf(
clt_parameters.getCorrSigma(image_dtt.isMonochrome()),
clt_bidata[iAux][iSubCam][chn],
clt_parameters.transform_size,
// image_dtt.transform_size,
threadsMax,
debug_lpf);
}
......@@ -1703,8 +1753,8 @@ public class TwoQuadCLT {
if (debugLevel > 0){
sdfa_instance.showArrays(clt,
tilesX*clt_parameters.transform_size,
tilesY*clt_parameters.transform_size,
tilesX*image_dtt.transform_size,
tilesY*image_dtt.transform_size,
true,
results[iQuadComb].getTitle()+"-CLT-D"+clt_parameters.disparity);
}
......@@ -1713,7 +1763,7 @@ public class TwoQuadCLT {
for (int chn=0; chn<iclt_data.length;chn++){
iclt_data[chn] = image_dtt.iclt_2d_debug_gpu(
clt_bidata[iAux][iSubCam][chn], // scanline representation of dcd data, organized as dct_size x dct_size tiles
clt_parameters.transform_size, // final int
// image_dtt.transform_size, // final int
clt_parameters.clt_window, // window_type
15, // clt_parameters.iclt_mask, //which of 4 to transform back
0, // clt_parameters.dbg_mode, //which of 4 to transform back
......@@ -1725,8 +1775,8 @@ public class TwoQuadCLT {
}
if (clt_parameters.gen_chn_stacks) sdfa_instance.showArrays(iclt_data,
(tilesX + 0) * clt_parameters.transform_size,
(tilesY + 0) * clt_parameters.transform_size,
(tilesX + 0) * image_dtt.transform_size,
(tilesY + 0) * image_dtt.transform_size,
true,
results[iQuadComb].getTitle()+"-ICLT-RGB-D"+clt_parameters.disparity);
if (!clt_parameters.gen_chn_img) continue;
......@@ -1742,8 +1792,8 @@ public class TwoQuadCLT {
!batch_mode, // true, // boolean saveShowIntermediate, // save/show if set globally
false, // boolean saveShowFinal, // save/show result (color image?)
iclt_data,
tilesX * clt_parameters.transform_size,
tilesY * clt_parameters.transform_size,
tilesX * image_dtt.transform_size,
tilesY * image_dtt.transform_size,
scaleExposures[iAux][iSubCam], // double scaleExposure, // is it needed?
debugLevel );
} // end of generating shifted channel images
......@@ -1836,21 +1886,42 @@ public class TwoQuadCLT {
final int threadsMax, // maximal number of threads to launch
final boolean updateStatus,
final int debugLevel){
// get fat_zero (absolute) and color scales
boolean is_mono = quadCLT_main.isMonochrome();
double fat_zero = clt_parameters.getGpuFatZero(is_mono); // 30.0;
double [] scales = (is_mono) ? (new double [] {1.0}) :(new double [] {
clt_parameters.gpu_weight_r, // 0.25
clt_parameters.gpu_weight_b, // 0.25
1.0 - clt_parameters.gpu_weight_r - clt_parameters.gpu_weight_b}); // 0.5
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
is_mono,
1.0);
float [][] lpf_rgb;
if (is_mono) {
lpf_rgb = new float[1][];
lpf_rgb[0] = image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_m);
} else {
lpf_rgb = new float[3][];
lpf_rgb[0] = image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_r);
lpf_rgb[1] = image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_b);
lpf_rgb[2] = image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_g);
}
gPUTileProcessor.setLpfRbg(
1.1f, // float sigma_r,
1.1f, // float sigma_b,
0.7f); // float sigma_g)
lpf_rgb);
float [] lpf_flat = image_dtt.floatGetCltLpfFd(clt_parameters.getGpuCorrSigma(is_mono));
gPUTileProcessor.setLpfCorr(
lpf_flat);
final boolean use_aux = false; // currently GPU is configured for a single quad camera
final boolean batch_mode = clt_parameters.batch_run; //disable any debug images
// final boolean get_ers = !batch_mode;
// boolean infinity_corr = false;
// double [][] scaleExposures= {scaleExposures_main, scaleExposures_aux};
boolean toRGB= quadCLT_main.correctionsParameters.toRGB;
// showDoubleFloatArrays sdfa_instance = new showDoubleFloatArrays(); // just for debugging? - TODO - move where it belongs
// may use this.StartTime to report intermediate steps execution times
String name=quadCLT_main.correctionsParameters.getModelName((String) imp_quad_main[0].getProperty("name"));
String path= (String) imp_quad_main[0].getProperty("path"); // Only for debug output
......@@ -1888,6 +1959,8 @@ public class TwoQuadCLT {
// Set task clt_parameters.disparity
GPUTileProcessor.TpTask [] tp_tasks = gPUTileProcessor.setFullFrameImages(
(float) clt_parameters.disparity, // float target_disparity, // apply same disparity to all tiles
0xf, // int out_image, // from which tiles to generate image (currently 0/1)
0x3f, // int corr_mask, // which correlation pairs to generate (maybe later - reduce size from 15x15)
!use_aux, // boolean use_master,
use_aux, // boolean use_aux,
quadCLT_main.getGeometryCorrection(), // final GeometryCorrection geometryCorrection_main,
......@@ -1895,44 +1968,55 @@ public class TwoQuadCLT {
null, // final double [][][] ers_delay, // if not null - fill with tile center acquisition delay
threadsMax, // final int threadsMax, // maximal number of threads to launch
debugLevel); // final int debugLevel)
gPUTileProcessor.setTasks(
tp_tasks, // TpTask [] tile_tasks,
use_aux); // boolean use_aux)
int [] corr_indices = gPUTileProcessor.getCorrTasks(
tp_tasks);
// corr_indices array of integers to be passed to GPU
gPUTileProcessor.setCorrIndices(corr_indices);
// All set, run kernel (correct and convert)
int NREPEAT = 1; // 00;
System.out.println("\n------------ Running GPU "+NREPEAT+" times ----------------");
long startGPU=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execConverCorrectTiles();
// run imclt;
long firstGPUTime= (System.nanoTime() - startGPU)/NREPEAT;
long startIMCLT=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execImcltRbg();
long runGPUTime = (System.nanoTime() - startGPU)/NREPEAT;
long endImcltTime = System.nanoTime();
long startCorr2d=System.nanoTime(); // System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execCorr2D(
scales,// double [] scales,
fat_zero); // double fat_zero);
long endCorr2d = System.nanoTime();
long endGPUTime = System.nanoTime();
long firstGPUTime= (startIMCLT- startGPU)/NREPEAT;
long runImcltTime = (endImcltTime - startIMCLT)/NREPEAT;
long runCorr2DTime = (endCorr2d - startCorr2d)/NREPEAT;
long runGPUTime = (endGPUTime - startGPU)/NREPEAT;
// run corr2d
System.out.println("\n------------ End of running GPU "+NREPEAT+" times ----------------");
System.out.println("GPU run time ="+(runGPUTime * 1.0e-6)+"ms, (direct conversion: "+(firstGPUTime*1.0e-6)+"ms, imclt: "+
((runGPUTime - firstGPUTime)*1.0e-6)+"ms)");
(runImcltTime*1.0e-6)+"ms), corr2D: "+(runCorr2DTime*1.0e-6)+"ms");
// get data back from GPU
float [][][] iclt_fimg = new float [GPUTileProcessor.NUM_CAMS][][];
for (int ncam = 0; ncam < iclt_fimg.length; ncam++) {
iclt_fimg[ncam] = gPUTileProcessor.getRBG(ncam);
}
// get data back from GPU
String [] rgb_titles = {"red","blue","green"};
int out_width = GPUTileProcessor.IMG_WIDTH + GPUTileProcessor.DTT_SIZE;
int out_height = GPUTileProcessor.IMG_HEIGHT + GPUTileProcessor.DTT_SIZE;
/*
for (int ncam = 0; ncam < iclt_fimg.length; ncam++) {
String title=name+"-RBG"+String.format("%02d", ncam);
(new ShowDoubleFloatArrays()).showArrays(
iclt_fimg[ncam],
out_width,
out_height,
true,
title,
rgb_titles);
}
*/
ImagePlus [] imps_RGB = new ImagePlus[iclt_fimg.length];
for (int ncam = 0; ncam < iclt_fimg.length; ncam++) {
String title=name+"-"+String.format("%02d", ncam);
......@@ -1953,6 +2037,25 @@ public class TwoQuadCLT {
debugLevel );
}
float [][] corr2D = gPUTileProcessor.getCorr2D();
// convert to 6-layer image using tasks
int tilesX = GPUTileProcessor.IMG_WIDTH / GPUTileProcessor.DTT_SIZE;
int tilesY = GPUTileProcessor.IMG_HEIGHT / GPUTileProcessor.DTT_SIZE;
int [] wh = new int[2];
double [][] dbg_corr = gPUTileProcessor.getCorr2DView(
tilesX,
tilesY,
corr_indices,
corr2D,
wh);
(new ShowDoubleFloatArrays()).showArrays(
dbg_corr,
wh[0],
wh[1],
true,
"CORR2D",
gPUTileProcessor.getCorrTitles());
if (clt_parameters.gen_chn_img) {
// combine to a sliced color image
// assuming total number of images to be multiple of 4
......@@ -2255,7 +2358,12 @@ public class TwoQuadCLT {
"MACRO-INPUT");
}
int macro_scale = clt_parameters.transform_size;
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
quadCLT_main.isMonochrome(),
clt_parameters.getScaleStrength(false));
int macro_scale = image_dtt.transform_size;
int mTilesX = tilesX/macro_scale;
int mTilesY = tilesY/macro_scale;
int [][] mtile_op = new int [mTilesY][mTilesX];
......@@ -2266,7 +2374,6 @@ public class TwoQuadCLT {
}
double [][] mdisparity_array = new double [mTilesY][mTilesX]; // keep all zeros
double [][] mdisparity_bimap = new double [ImageDtt.BIDISPARITY_TITLES.length][];
ImageDtt image_dtt = new ImageDtt(quadCLT_main.isMonochrome(),clt_parameters.getScaleStrength(false));
image_dtt.clt_bi_macro(
clt_parameters, // final EyesisCorrectionParameters.CLTParameters clt_parameters,
clt_parameters.getFatZero(image_dtt.isMonochrome()), // final double fatzero, // May use correlation fat zero from 2 different parameters - fat_zero and rig.ml_fatzero
......@@ -3155,7 +3262,10 @@ if (debugLevel > -100) return true; // temporarily !
}
}
}
ImageDtt image_dtt = new ImageDtt(quadCLT_main.isMonochrome(),clt_parameters.getScaleStrength(false));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
quadCLT_main.isMonochrome(),
clt_parameters.getScaleStrength(false));
image_dtt.clt_bi_quad (
clt_parameters, // final EyesisCorrectionParameters.CLTParameters clt_parameters,
clt_parameters.getFatZero(image_dtt.isMonochrome()), // final double fatzero, // May use correlation fat zero from 2 different parameters - fat_zero and rig.ml_fatzero
......@@ -3175,7 +3285,7 @@ if (debugLevel > -100) return true; // temporarily !
null, // ml_data, // final double [][] ml_data, // data for ML - 10 layers - 4 center areas (3x3, 5x5,..) per camera-per direction, 1 - composite, and 1 with just 1 data (target disparity)
texture_tiles[0], // final double [][][][] texture_tiles_main, // [tilesY][tilesX]["RGBA".length()][]; null - will skip images combining
texture_tiles[1], // final double [][][][] texture_tiles_aux, // [tilesY][tilesX]["RGBA".length()][]; null - will skip images combining
quadCLT_main.tp.getTilesX()*clt_parameters.transform_size, // final int width,
quadCLT_main.tp.getTilesX()*image_dtt.transform_size, // final int width,
quadCLT_main.getGeometryCorrection(), // final GeometryCorrection geometryCorrection_main,
quadCLT_aux.getGeometryCorrection(), // final GeometryCorrection geometryCorrection_aux,
......@@ -7232,7 +7342,10 @@ if (debugLevel > -100) return true; // temporarily !
final int threadsMax, // maximal number of threads to launch
final boolean updateStatus,
final int debugLevel){
ImageDtt image_dtt = new ImageDtt(quadCLT_main.isMonochrome(),clt_parameters.getScaleStrength(false));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
quadCLT_main.isMonochrome(),
clt_parameters.getScaleStrength(false));
double [][] disparity_bimap = new double [ImageDtt.BIDISPARITY_TITLES.length][]; //[0] -residual disparity, [1] - orthogonal (just for debugging) last 4 - max pixel differences
......@@ -7255,7 +7368,7 @@ if (debugLevel > -100) return true; // temporarily !
ml_data, // final double [][] ml_data, // data for ML - 10 layers - 4 center areas (3x3, 5x5,..) per camera-per direction, 1 - composite, and 1 with just 1 data (target disparity)
null, // final double [][][][] texture_tiles_main, // [tilesY][tilesX]["RGBA".length()][]; null - will skip images combining
null, // final double [][][][] texture_tiles_aux, // [tilesY][tilesX]["RGBA".length()][]; null - will skip images combining
quadCLT_main.tp.getTilesX()*clt_parameters.transform_size, // final int width,
quadCLT_main.tp.getTilesX()*image_dtt.transform_size, // final int width,
quadCLT_main.getGeometryCorrection(), // final GeometryCorrection geometryCorrection_main,
quadCLT_aux.getGeometryCorrection(), // final GeometryCorrection geometryCorrection_aux,
......@@ -7397,7 +7510,10 @@ if (debugLevel > -100) return true; // temporarily !
final int threadsMax, // maximal number of threads to launch
final boolean updateStatus,
final int debugLevel){
ImageDtt image_dtt = new ImageDtt(quadCLT_aux.isMonochrome(),clt_parameters.getScaleStrength(true));
ImageDtt image_dtt = new ImageDtt(
clt_parameters.transform_size,
quadCLT_aux.isMonochrome(),
clt_parameters.getScaleStrength(true));
double [][] disparity_bimap = new double [ImageDtt.BIDISPARITY_TITLES.length][]; //[0] -residual disparity, [1] - orthogonal (just for debugging) last 4 - max pixel differences
image_dtt.clt_bi_quad (
clt_parameters, // final EyesisCorrectionParameters.CLTParameters clt_parameters,
......@@ -7417,8 +7533,8 @@ if (debugLevel > -100) return true; // temporarily !
ml_data, // final double [][] ml_data, // data for ML - 10 layers - 4 center areas (3x3, 5x5,..) per camera-per direction, 1 - composite, and 1 with just 1 data (target disparity)
null, // final double [][][][] texture_tiles_main, // [tilesY][tilesX]["RGBA".length()][]; null - will skip images combining
null, // final double [][][][] texture_tiles_aux, // [tilesY][tilesX]["RGBA".length()][]; null - will skip images combining
// quadCLT_main.tp.getTilesX()*clt_parameters.transform_size, // final int width,
quadCLT_aux.tp.getTilesX()*clt_parameters.transform_size, // final int width,
// quadCLT_main.tp.getTilesX()*image_dtt.transform_size, // final int width,
quadCLT_aux.tp.getTilesX()*image_dtt.transform_size, // final int width,
null, // quadCLT_main.getGeometryCorrection(), // final GeometryCorrection geometryCorrection_main,
quadCLT_aux.getGeometryCorrection(), // final GeometryCorrection geometryCorrection_aux,
......
......@@ -46,12 +46,19 @@
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define NUM_CAMS 4
#define NUM_PAIRS 6
#define NUM_COLORS 3
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define CORR_THREADS_PER_TILE 8
#define CORR_TILES_PER_BLOCK 4
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#define CORR_PAIR_SHIFT 8 // 8 lower bits - number of a pair, other bits tile number
#define TASK_CORR_BITS 4
#define CORR_OUT_RAD 7
#endif
//#define IMCLT14
......@@ -106,6 +113,11 @@
#define DTT_SIZE1 (DTT_SIZE + 1)
#define DTT_SIZE2 (2 * DTT_SIZE)
#define DTT_SIZE21 (DTT_SIZE2 + 1)
#define DTT_SIZE4 (4 * DTT_SIZE)
#define DTT_SIZE2M1 (DTT_SIZE2 - 1)
// Use CORR_OUT_RAD for the correlation output
#define BAYER_RED 0
#define BAYER_BLUE 1
......@@ -117,15 +129,16 @@
//#define BAYER_BLUE_COL (1 - BAYER_RED_COL)
#define DBG_TILE_X 174
#define DBG_TILE_Y 118
#define DBG_TILE_X 40
#define DBG_TILE_Y 80
//#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
//#define DEBUG1 1
//#define DEBUG2 1
//#define DEBUG3 1
//#define DEBUG4 1
//#define DEBUG5 1
#define DEBUG6 1
//56494
// struct tp_task
//#define TASK_SIZE 12
......@@ -311,6 +324,24 @@ __constant__ float lpf_data[3][64]={
0.05616371f, 0.04888546f, 0.03703642f, 0.02442406f, 0.01402412f, 0.00703062f, 0.00315436f, 0.00153247f,
0.02728573f, 0.02374977f, 0.01799322f, 0.01186582f, 0.00681327f, 0.00341565f, 0.00153247f, 0.00074451f
}};
__constant__ float lpf_corr[64]={ // modify if needed
1.00000000f, 0.87041007f, 0.65943687f, 0.43487258f, 0.24970076f, 0.12518080f, 0.05616371f, 0.02728573f,
0.87041007f, 0.75761368f, 0.57398049f, 0.37851747f, 0.21734206f, 0.10895863f, 0.04888546f, 0.02374977f,
0.65943687f, 0.57398049f, 0.43485698f, 0.28677101f, 0.16466189f, 0.08254883f, 0.03703642f, 0.01799322f,
0.43487258f, 0.37851747f, 0.28677101f, 0.18911416f, 0.10858801f, 0.05443770f, 0.02442406f, 0.01186582f,
0.24970076f, 0.21734206f, 0.16466189f, 0.10858801f, 0.06235047f, 0.03125774f, 0.01402412f, 0.00681327f,
0.12518080f, 0.10895863f, 0.08254883f, 0.05443770f, 0.03125774f, 0.01567023f, 0.00703062f, 0.00341565f,
0.05616371f, 0.04888546f, 0.03703642f, 0.02442406f, 0.01402412f, 0.00703062f, 0.00315436f, 0.00153247f,
0.02728573f, 0.02374977f, 0.01799322f, 0.01186582f, 0.00681327f, 0.00341565f, 0.00153247f, 0.00074451f
};
__constant__ int pairs[6][2]={
{0, 1},
{2, 3},
{0, 2},
{1, 3},
{0, 3},
{2, 1}};
//#endif
__device__ void convertCorrectTile(
struct CltExtra * gpu_kernel_offsets, // [tileY][tileX][color]
......@@ -333,32 +364,297 @@ __device__ void convertCorrectTile(
float window_hor_sin [2*DTT_SIZE],
float window_vert_cos [2*DTT_SIZE]);
__device__ void debug_print_lpf(
float * lpf_tile);
__device__ void debug_print_clt1(
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports)
const int color,
int mask);
__device__ void debug_print_mclt(
float * mclt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports)
const int color);
__device__ void debug_print_corr_15x15(
float * mclt_tile, //DTT_SIZE2M1 x DTT_SIZE2M1
const int color);
// Fractional pixel shift (phase rotation), horizontal. In-place.
__device__ void shiftTileHor(
__device__ void shiftTileHor( // implemented, used
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float residual_shift );
// Fractional pixel shift (phase rotation), vertical. In-place.
__device__ void shiftTileVert(
__device__ void shiftTileVert( // implemented, used
float *clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float residual_shift );
__device__ void convolveTiles(
__device__ void convolveTiles( // implemented, used
float* clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data, rows extended to optimize shared ports
float* kernel); // [4][DTT_SIZE][DTT_SIZE1]) // 4 quadrants of the CLT kernel (DTT3 converted)
__device__ void imclt(
__device__ void correlateAccumulateTiles(
float scale, // scale correlation
float* clt_tile1, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data 1, rows extended to optimize shared ports
float* clt_tile2, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data 2, rows extended to optimize shared ports
float* corr_tile); // [4][DTT_SIZE][DTT_SIZE1]) // 4 quadrants of the correlation result
__device__ void resetCorrelation(
float* corr_tile); // [4][DTT_SIZE][DTT_SIZE1]) // 4 quadrants of the correlation result
__device__ void normalizeTileAmplitude(
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float fat_zero); // fat zero is absolute, scale it outside
__device__ void corrUnfoldTile(
float* qdata0, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data, rows extended to optimize shared ports
float* rslt); // [DTT_SIZE2M1][DTT_SIZE2M1]) // 15x15
__device__ void imclt( // implemented, used // why is it twice?
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float * mclt_tile ); // [2* DTT_SIZE][DTT_SIZE1+ DTT_SIZE], // +1 to alternate column ports[16][17]
__device__ void imclt(
__device__ void imclt( // implemented, used // why is it twice?
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports [4][8][9]
float * mclt_tile ); // [2* DTT_SIZE][DTT_SIZE1+ DTT_SIZE], // +1 to alternate column ports[16][17]
__device__ void imclt_plane(
__device__ void imclt_plane( // not implemented, not used
int color,
float * gpu_clt, // [TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float * gpu_rbg, // WIDTH, HEIGHT
const size_t dstride); // in floats (pixels)
extern "C"
__global__ void correlate2D(
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// int tilesX, // make it variable
int colors, // number of colors (3/1)
float scale0, // scale for R
float scale1, // scale for B
float scale2, // scale for G
float fat_zero, // here - absolute
size_t num_corr_tiles, // number of correlation tiles to process
int * gpu_corr_indices, // packed tile+pair
const size_t corr_stride, // in floats
float * gpu_corrs) // correlation output data
{
/// int thr3 = threadIdx.x >> 3; // now zero?
/// int column = threadIdx.x; // modify to use 2 * 8 threads, if needed.
float scales[3] = {scale0, scale1, scale2};
int corr_in_block = threadIdx.y;
int corr_num = blockIdx.x * CORR_TILES_PER_BLOCK + corr_in_block;
if (corr_num >= num_corr_tiles){
return; // nothing to do
}
// get number of pair and number of tile
#define ALLTILES 1
#ifdef ALLTILES
int corr_pair = corr_num % NUM_PAIRS;
int tile_num = corr_num / NUM_PAIRS;
#else
int corr_pair = gpu_corr_indices[corr_num];
int tile_num = corr_pair >> CORR_PAIR_SHIFT;
#endif
corr_pair &= (corr_pair & ((1 << CORR_PAIR_SHIFT) - 1));
if (corr_pair > NUM_PAIRS){
return; // BUG - should not happen
}
int cam1 = pairs[corr_pair][0]; // number of the first camera in a pair
int cam2 = pairs[corr_pair][1]; // number of the first camera in a pair
__syncthreads();// __syncwarp();
__shared__ float clt_tiles1 [CORR_TILES_PER_BLOCK][4][DTT_SIZE][DTT_SIZE1];
__shared__ float clt_tiles2 [CORR_TILES_PER_BLOCK][4][DTT_SIZE][DTT_SIZE1];
__shared__ float clt_corrs [CORR_TILES_PER_BLOCK][4][DTT_SIZE][DTT_SIZE1];
__shared__ float mlt_corrs [CORR_TILES_PER_BLOCK][DTT_SIZE2M1][DTT_SIZE2M1]; // result correlation
// set clt_corr to all zeros
float * clt_corr = ((float *) clt_corrs) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1); // top left quadrant0
float * mclt_corr = ((float *) mlt_corrs) + corr_in_block * (DTT_SIZE2M1*DTT_SIZE2M1);
resetCorrelation(clt_corr);
for (int color = 0; color < colors; color++){
// copy clt (frequency domain data)
float * clt_tile1 = ((float *) clt_tiles1) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
float * clt_tile2 = ((float *) clt_tiles2) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
int offs = (tile_num * NUM_COLORS + color) * (4 * DTT_SIZE * DTT_SIZE) + threadIdx.x;
float * gpu_tile1 = ((float *) gpu_clt[cam1]) + offs;
float * gpu_tile2 = ((float *) gpu_clt[cam2]) + offs;
float * clt_tile1i = clt_tile1 + threadIdx.x;
float * clt_tile2i = clt_tile2 + threadIdx.x;
#pragma unroll
for (int i = 0; i < DTT_SIZE4; i++){ // copy 32 rows (4 quadrants of 8 rows)
*clt_tile1i= *gpu_tile1;
*clt_tile2i= *gpu_tile2;
clt_tile1i += DTT_SIZE1;
clt_tile2i += DTT_SIZE1;
gpu_tile1 += DTT_SIZE;
gpu_tile2 += DTT_SIZE;
}
__syncthreads();
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D tile = %d, pair=%d, color = %d CAMERA1\n",tile_num, corr_pair,color);
debug_print_clt1(clt_tile1, color, 0xf); //
printf("\ncorrelate2D tile = %d, pair=%d, color = %d CAMERA22\n",tile_num, corr_pair,color);
debug_print_clt1(clt_tile2, color, 0xf); //
}
__syncthreads();// __syncwarp();
#endif
#endif
// each thread should get the same pointers here, offsets are inside
correlateAccumulateTiles(
scales[color], // float scale, // scale correlation
clt_tile1, // float* clt_tile1, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data 1, rows extended to optimize shared ports
clt_tile2, // float* clt_tile2, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data 2, rows extended to optimize shared ports
clt_corr); // float* corr_tile) // [4][DTT_SIZE][DTT_SIZE1]) // 4 quadrants of the correlation result
__syncthreads();
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D, color = %d CORRELATION\n", color);
debug_print_clt1(clt_corr, color, 0xf);
}
__syncthreads();// __syncwarp();
#endif
#endif
}
normalizeTileAmplitude(
clt_corr, // float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
fat_zero); // float fat_zero ) // fat zero is absolute, scale it outside
// Low Pass Filter from constant area (is it possible to replace?)
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D CORRELATION NORMALIZED, fat_zero=%f\n",fat_zero);
debug_print_clt1(clt_corr, -1, 0xf);
}
__syncthreads();// __syncwarp();
#endif
#endif
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D LPF\n");
debug_print_lpf(lpf_corr);
}
__syncthreads();// __syncwarp();
#endif
#endif
float *clt = clt_corr + threadIdx.x;
#pragma unroll
for (int q = 0; q < 4; q++){
float *lpf = lpf_corr + threadIdx.x;
#pragma unroll
for (int i = 0; i < DTT_SIZE; i++){
(*clt) *= (*lpf);
clt += DTT_SIZE1;
lpf += DTT_SIZE;
}
}
__syncthreads();// __syncwarp();
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D CORRELATION LPF-ed\n");
debug_print_clt1(clt_corr, -1, 0xf);
}
__syncthreads();// __syncwarp();
#endif
#endif
// now new part - need to transform with DCT-II and make 15x15
/*
// quadrant 0 dct_ii hor, dct_ii vert,
// quadrant 1 dct_ii hor, dst_ii vert,
// quadrant 2 dst_ii hor, dct_ii vert,
// quadrant 3 dst_ii hor, dst_ii vert,
Java code:
for (int quadrant = 0; quadrant < 4; quadrant++){
int mode = ((quadrant << 1) & 2) | ((quadrant >> 1) & 1); // transpose
tcorr[first_col][quadrant] = dtt.dttt_iie(tcorr[first_col][quadrant], mode, transform_size);
}
*/
// change to 16-32 threads?? in next iteration
// hor pass
for (int q = 0; q < 4; q++){
int is_sin = (q >> 1) & 1;
// int is_sin = q & 1;
// dttii_shared_mem(clt_corr + (q * DTT_SIZE + threadIdx.x) * DTT_SIZE1 , 1, is_sin); // horizontal pass, tread is row
// dttii_shared_mem(clt_corr + q * (DTT_SIZE1 * DTT_SIZE) + threadIdx.x , DTT_SIZE1, is_sin); // vertical pass, thread is column
dttii_shared_mem_nonortho(clt_corr + q * (DTT_SIZE1 * DTT_SIZE) + threadIdx.x , DTT_SIZE1, is_sin); // vertical pass, thread is column
}
__syncthreads();
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D AFTER VERTICAL (HORIZONTAL) PASS\n");
debug_print_clt1(clt_corr, -1, 0xf);
}
__syncthreads();// __syncwarp();
#endif
#endif
// vert pass
for (int q = 0; q < 4; q++){
int is_sin = q & 1;
// int is_sin = (q >> 1) & 1;
// dttii_shared_mem(clt_corr + q * (DTT_SIZE1 * DTT_SIZE) + threadIdx.x , DTT_SIZE1, is_sin); // vertical pass, thread is column
// dttii_shared_mem(clt_corr + (q * DTT_SIZE + threadIdx.x) * DTT_SIZE1 , 1, is_sin); // horizontal pass, tread is row
dttii_shared_mem_nonortho(clt_corr + (q * DTT_SIZE + threadIdx.x) * DTT_SIZE1 , 1, is_sin); // horizontal pass, tread is row
}
__syncthreads();
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D AFTER HOSIZONTAL (VERTICAL) PASS\n");
debug_print_clt1(clt_corr, -1, 0xf);
}
__syncthreads();// __syncwarp();
#endif
#endif
corrUnfoldTile(
(float *) clt_corr, // float* qdata0, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data, rows extended to optimize shared ports
(float *) mclt_corr); // float* rslt) // [DTT_SIZE2M1][DTT_SIZE2M1]) // 15x15
__syncthreads();
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D after UNFOLD\n");
debug_print_corr_15x15(mclt_corr, -1);
}
__syncthreads();// __syncwarp();
#endif
#endif
// copy 15x15 tile to main memory
int corr_tile_offset = + corr_stride * corr_num;
float *mem_corr = gpu_corrs + corr_tile_offset;
//CORR_THREADS_PER_TILE
// int offs = threadIdx.x;
#pragma unroll
for (int offs = threadIdx.x; offs < DTT_SIZE2M1*DTT_SIZE2M1; offs+=CORR_THREADS_PER_TILE){ // variable number of cycles per thread
mem_corr[offs] = mclt_corr[offs];
}
__syncthreads();
#ifdef DBG_TILE
#ifdef DEBUG6
if ((tile_num == DBG_TILE) && (corr_pair == 0) && (threadIdx.x == 0)){
printf("\ncorrelate2D after copy to main memory\n");
// debug_print_clt1(clt_corr, -1, 0xf);
}
__syncthreads();// __syncwarp();
#endif
#endif
}
extern "C"
__global__ void convert_correct_tiles(
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct paraeters
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
float ** gpu_kernel_offsets, // [NUM_CAMS],
float ** gpu_kernels, // [NUM_CAMS],
float ** gpu_images, // [NUM_CAMS],
......@@ -367,9 +663,7 @@ __global__ void convert_correct_tiles(
size_t dstride, // in floats (pixels)
int num_tiles, // number of tiles in task
int lpf_mask) // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
{
// struct CltExtra* gpu_kernel_offsets = (struct CltExtra*) vgpu_kernel_offsets;
dim3 t = threadIdx;
int tile_in_block = threadIdx.y;
int task_num = blockIdx.x * TILES_PER_BLOCK + tile_in_block;
......@@ -379,8 +673,6 @@ __global__ void convert_correct_tiles(
__shared__ struct tp_task tt [TILES_PER_BLOCK];
// Copy task data to shared memory
tt[tile_in_block].task = gpu_task -> task;
// tt[tile_in_block].tx = gpu_task -> tx;
// tt[tile_in_block].ty = gpu_task -> ty;
tt[tile_in_block].txy = gpu_task -> txy;
int thread0 = threadIdx.x & 1;
int thread12 = threadIdx.x >>1;
......@@ -426,7 +718,6 @@ __global__ void convert_correct_tiles(
lpf_mask, // const int lpf_mask,
tt[tile_in_block].xy[ncam][0], // const float centerX,
tt[tile_in_block].xy[ncam][1], // const float centerY,
// tt[tile_in_block].tx | (tt[tile_in_block].ty <<16), // const int txy,
tt[tile_in_block].txy, // const int txy,
dstride, // size_t dstride, // in floats (pixels)
(float * )(clt_tile [tile_in_block]), // float clt_tile [TILES_PER_BLOCK][NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE])
......@@ -556,6 +847,191 @@ __device__ void convolveTiles(
}
}
__device__ void correlateAccumulateTiles(
float scale, // scale correlation
float* clt_tile1, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data 1, rows extended to optimize shared ports
float* clt_tile2, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data 2, rows extended to optimize shared ports
float* corr_tile) // [4][DTT_SIZE][DTT_SIZE1]) // 4 quadrants of the correlation result
{
int joffs = threadIdx.x * DTT_SIZE1;
float * clt_tile2_j; // = clt_tile2 + joffs; // ==&clt_tile2[0][j][0]
float * clt_tile1_j0 = clt_tile1 + joffs; // ==&clt_tile[0][j][0]
float * clt_tile1_j1 = clt_tile1_j0 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[1][j][0]
float * clt_tile1_j2 = clt_tile1_j1 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[2][j][0]
float * clt_tile1_j3 = clt_tile1_j2 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[3][j][0]
float * corr_tile_j0 = corr_tile + joffs; // ==&clt_tile[0][j][0]
float * corr_tile_j1 = corr_tile_j0 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[1][j][0]
float * corr_tile_j2 = corr_tile_j1 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[2][j][0]
float * corr_tile_j3 = corr_tile_j2 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[3][j][0]
//#pragma unroll
for (int i = 0; i < DTT_SIZE; i++){
// k=0
clt_tile2_j = clt_tile2 + joffs + i;
float clt2 = *(clt_tile2_j);
float r0 = *(clt_tile1_j0) * clt2;
float r1 = -*(clt_tile1_j1) * clt2;
float r2 = -*(clt_tile1_j2) * clt2;
float r3 = *(clt_tile1_j3) * clt2;
// k = 1
clt_tile2_j += (DTT_SIZE1*DTT_SIZE);
clt2 = *(clt_tile2_j);
r0 += *(clt_tile1_j1) * clt2;
r1 += *(clt_tile1_j0) * clt2;
r2 -= *(clt_tile1_j3) * clt2;
r3 -= *(clt_tile1_j2) * clt2;
// k=2
clt_tile2_j += (DTT_SIZE1*DTT_SIZE);
clt2 = *(clt_tile2_j);
r0 += *(clt_tile1_j2) * clt2;
r1 -= *(clt_tile1_j3) * clt2;
r2 += *(clt_tile1_j0) * clt2;
r3 -= *(clt_tile1_j1) * clt2;
// k=3
clt_tile2_j += (DTT_SIZE1*DTT_SIZE);
clt2 = *(clt_tile2_j);
r0 += *(clt_tile1_j3) * clt2;
r1 += *(clt_tile1_j2) * clt2;
r2 += *(clt_tile1_j1) * clt2;
r3 += *(clt_tile1_j0) * clt2;
*(corr_tile_j0) += scale * r0;
*(corr_tile_j1) += scale * r1;
*(corr_tile_j2) += scale * r2;
*(corr_tile_j3) += scale * r3;
clt_tile1_j0 ++;
clt_tile1_j1 ++;
clt_tile1_j2 ++;
clt_tile1_j3 ++;
corr_tile_j0 ++;
corr_tile_j1 ++;
corr_tile_j2 ++;
corr_tile_j3 ++;
}
}
__device__ void resetCorrelation(
float* corr_tile) // [4][DTT_SIZE][DTT_SIZE1]) // 4 quadrants of the correlation result
{
int joffs = threadIdx.x * DTT_SIZE1;
float * corr_tile_j0 = corr_tile + joffs; // k = 0
float * corr_tile_j1 = corr_tile_j0 + (DTT_SIZE1*DTT_SIZE); // k = 1
float * corr_tile_j2 = corr_tile_j1 + (DTT_SIZE1*DTT_SIZE); // k = 2
float * corr_tile_j3 = corr_tile_j2 + (DTT_SIZE1*DTT_SIZE); // k = 3
//#pragma unroll
for (int i = 0; i < DTT_SIZE; i++){
*(corr_tile_j0) = 0;
*(corr_tile_j1) = 0;
*(corr_tile_j2) = 0;
*(corr_tile_j3) = 0;
corr_tile_j0 ++;
corr_tile_j1 ++;
corr_tile_j2 ++;
corr_tile_j3 ++;
}
}
__device__ void normalizeTileAmplitude(
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports
float fat_zero ) // fat zero is absolute, scale it outside
{
int joffs = threadIdx.x * DTT_SIZE1;
float * clt_tile_j0 = clt_tile + joffs; // ==&clt_tile[0][j][0]
float * clt_tile_j1 = clt_tile_j0 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[1][j][0]
float * clt_tile_j2 = clt_tile_j1 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[2][j][0]
float * clt_tile_j3 = clt_tile_j2 + (DTT_SIZE1*DTT_SIZE); // ==&clt_tile[3][j][0]
#pragma unroll
for (int i = 0; i < DTT_SIZE; i++) {
float s2 = fat_zero * fat_zero +
*(clt_tile_j0) * *(clt_tile_j0) +
*(clt_tile_j1) * *(clt_tile_j1) +
*(clt_tile_j2) * *(clt_tile_j2) +
*(clt_tile_j3) * *(clt_tile_j3);
float scale = rsqrtf(s2); // 1.0/sqrt(s2)
*(clt_tile_j0) *= scale;
*(clt_tile_j1) *= scale;
*(clt_tile_j2) *= scale;
*(clt_tile_j3) *= scale;
clt_tile_j0 ++; // =DTT_SIZE1;
clt_tile_j1 ++; // =DTT_SIZE1;
clt_tile_j2 ++; // =DTT_SIZE1;
clt_tile_j3 ++; // =DTT_SIZE1;
}
}
/*
Converted from DttRad2.java:443
public double [] corr_unfold_tile(
double [][] qdata, // [4][transform_size*transform_size] data after DCT2 (pixel domain)
int transform_size
)
*/
__device__ void corrUnfoldTile(
float* qdata0, // [4][DTT_SIZE][DTT_SIZE1], // 4 quadrants of the clt data, rows extended to optimize shared ports
float* rslt) // [DTT_SIZE2M1][DTT_SIZE2M1]) // 15x15
{
const int rslt_base_index = DTT_SIZE2M1 * (DTT_SIZE) - DTT_SIZE;
float * qdata1 = qdata0 + (DTT_SIZE * DTT_SIZE1);
float * qdata2 = qdata1 + (DTT_SIZE * DTT_SIZE1);
float * qdata3 = qdata2 + (DTT_SIZE * DTT_SIZE1);
int i = threadIdx.x;
float corr_pixscale = 0.25f;
int i_transform_size = i * DTT_SIZE1; // used to address source rows which are 9 long
int im1_transform_size = i_transform_size - DTT_SIZE1; // negative for i = 0, use only after divergence
int rslt_row_offs = i * DTT_SIZE2M1;
int rslt_base_index_p = rslt_base_index + rslt_row_offs; // i * DTT_SIZE2M1;
int rslt_base_index_m = rslt_base_index - rslt_row_offs; // i * DTT_SIZE2M1;
rslt[rslt_base_index_p] = corr_pixscale * qdata0[i_transform_size]; // incomplete, will only be used for thread i=0
rslt[rslt_base_index_m] = rslt[rslt_base_index_p]; // nop for i=0 incomplete, will only be used for thread i=0
for (int j = 1; j < DTT_SIZE; j++) {
int rslt_base_index_pp = rslt_base_index_p + j;
int rslt_base_index_pm = rslt_base_index_p - j;
/// int rslt_base_index_mp = rslt_base_index_m + j;
/// int rslt_base_index_mm = rslt_base_index_m - j;
rslt[rslt_base_index_pp] = corr_pixscale * (
qdata0[i_transform_size + j] +
qdata1[i_transform_size + j -1]); // incomplete, will only be used for thread i=0
rslt[rslt_base_index_pm] = corr_pixscale * (
qdata0[i_transform_size + j] +
-qdata1[i_transform_size + j -1]); // incomplete, will only be used for thread i=0
}
if (i == 0) {
return;
}
/// int im1 = i-1;
im1_transform_size = i_transform_size - DTT_SIZE1;
float d = corr_pixscale * qdata2[im1_transform_size];
rslt[rslt_base_index_p] += d;
rslt[rslt_base_index_m] -= d;
for (int j = 1; j < DTT_SIZE; j++) {
int rslt_base_index_pp = rslt_base_index_p + j;
int rslt_base_index_pm = rslt_base_index_p - j;
int rslt_base_index_mp = rslt_base_index_m + j;
int rslt_base_index_mm = rslt_base_index_m - j;
float d2 = corr_pixscale * qdata2[im1_transform_size + j];
float d3 = corr_pixscale * qdata3[im1_transform_size + j -1];
//rslt[rslt_base_index_mp], rslt[rslt_base_index_mp] are partially calculated in the cycle common with i=0
rslt[rslt_base_index_mp] = rslt[rslt_base_index_pp] - d2 - d3;
rslt[rslt_base_index_mm] = rslt[rslt_base_index_pm] - d2 + d3;
rslt[rslt_base_index_pp] += d2 + d3;
rslt[rslt_base_index_pm] += d2 - d3;
}
}
__device__ void debug_print_lpf(
float * lpf_tile)
{
for (int dbg_row = 0; dbg_row < DTT_SIZE; dbg_row++){
for (int dbg_col = 0; dbg_col < DTT_SIZE; dbg_col++){
printf ("%10.5f ", lpf_tile[dbg_row * DTT_SIZE + dbg_col]);
}
printf("\n");
}
}
__device__ void debug_print_clt1(
float * clt_tile, // [4][DTT_SIZE][DTT_SIZE1], // +1 to alternate column ports)
const int color,
......@@ -591,6 +1067,23 @@ __device__ void debug_print_mclt(
printf("\n");
}
__device__ void debug_print_corr_15x15(
float * mclt_tile, //DTT_SIZE2M1 x DTT_SIZE2M1
const int color)
{
if (color >= 0) printf("----------- Color = %d -----------\n",color);
for (int dbg_row = 0; dbg_row < DTT_SIZE2M1; dbg_row++){
for (int dbg_col = 0; dbg_col < DTT_SIZE2M1; dbg_col++){
printf ("%10.5f ", mclt_tile[dbg_row * DTT_SIZE2M1 + dbg_col]);
}
printf("\n");
}
printf("\n");
}
__device__ void convertCorrectTile(
struct CltExtra * gpu_kernel_offsets, // [tileY][tileX][color]
float * gpu_kernels, // [tileY][tileX][color]
......@@ -1361,7 +1854,7 @@ __device__ void imclt_plane(
//
// Uses 16 threads, gets 4*8*8 clt tiles, performs idtt-iv (swapping 1 and 2 quadrants) and then unfolds with window,
// adding to the output 16x16 tile (to use Read-modify-write with 4 passes over the frame. Shuld be zeroed before the
// adding to the output 16x16 tile (to use Read-modify-write with 4 passes over the frame. Should be zeroed before the
// first pass
//__constant__ int imclt_indx9[16] = {0x28,0x31,0x3a,0x43,0x43,0x3a,0x31,0x28,0x1f,0x16,0x0d,0x04,0x04,0x0d,0x16,0x1f};
__device__ void imclt(
......
......@@ -36,10 +36,10 @@
* \brief DCT-II, DST-II, DCT-IV and DST-IV for Complex Lapped Transform of 16x16 (stride 8)
* in GPU
* This file contains building blocks for the 16x16 stride 8 COmplex Lapped Transform (CLT)
* imlementation. DTT-IV are used for forward and inverse 2D CLT, DTT-II - to convert correlation
* implementation. DTT-IV are used for forward and inverse 2D CLT, DTT-II - to convert correlation
* results from the frequency to pixel domain. DTT-III (inverse of DTT-II) is not implemented
* here it is used to convert convolution kernels and LPF to the frequency domain - done in
* softwaer.
* software.
*
* This file is cpompatible with both runtime and driver API, runtime is used for development
* with Nvidia Nsight, driver API when calling these kernels from Java
......@@ -84,23 +84,24 @@ __constant__ float SINN1[] = {0.195090f,0.555570f};
__constant__ float SINN2[] = {0.098017f,0.290285f,0.471397f,0.634393f};
inline __device__ void dttii_shared_mem(float * x0, int inc, int dst_not_dct);
inline __device__ void dttiv_shared_mem(float * x0, int inc, int dst_not_dct);
inline __device__ void dttiv_nodiverg(float * x, int inc, int dst_not_dct);
inline __device__ void dctiv_nodiverg(float * x0, int inc);
inline __device__ void dstiv_nodiverg(float * x0, int inc);
inline __device__ void dttii_shared_mem_nonortho(float * x0, int inc, int dst_not_dct); // does not scale by y[0] (y[7]) by 1/sqrt[0]
inline __device__ void dttii_shared_mem(float * x0, int inc, int dst_not_dct); // used in GPU_DTT24_DRV
inline __device__ void dttiv_shared_mem(float * x0, int inc, int dst_not_dct); // used in GPU_DTT24_DRV
inline __device__ void dttiv_nodiverg (float * x, int inc, int dst_not_dct); // not used
inline __device__ void dctiv_nodiverg (float * x0, int inc); // used in TP
inline __device__ void dstiv_nodiverg (float * x0, int inc); // used in TP
inline __device__ void dct_ii8 ( float x[8], float y[8]); // x,y point to 8-element arrays each
inline __device__ void dct_iv8 ( float x[8], float y[8]); // x,y point to 8-element arrays each
inline __device__ void dst_iv8 ( float x[8], float y[8]); // x,y point to 8-element arrays each
inline __device__ void _dctii_nrecurs8 ( float x[8], float y[8]); // x,y point to 8-element arrays each
inline __device__ void _dctiv_nrecurs8 ( float x[8], float y[8]); // x,y point to 8-element arrays each
inline __device__ void dct_ii8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
inline __device__ void dct_iv8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
inline __device__ void dst_iv8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
inline __device__ void _dctii_nrecurs8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
inline __device__ void _dctiv_nrecurs8 ( float x[8], float y[8]); // x,y point to 8-element arrays each // not used
/**
**************************************************************************
* Converts 2D image (in the GPU memory) using 8x8 DTT 8x8 tiles.
* Mostly for testing and profiling individual converions
* Mostly for testing and profiling individual conversions
*
* \param dst [OUT] - Coefficients as 8x8 tiles
* \param src [IN] - Source image of floats
......@@ -376,6 +377,88 @@ inline __device__ void dttii_shared_mem(float * x0, int inc, int dst_not_dct)
}
}
inline __device__ void dttii_shared_mem_nonortho(float * x0, int inc, int dst_not_dct)
{
float *x1 = x0 + inc;
float *x2 = x1 + inc;
float *x3 = x2 + inc;
float *x4 = x3 + inc;
float *x5 = x4 + inc;
float *x6 = x5 + inc;
float *x7 = x6 + inc;
float u00, u01, u02, u03, u10, u11, u12, u13;
if (dst_not_dct) { // DSTII
// invert odd input samples
u00= ( (*x0) - (*x7));
u10= ( (*x0) + (*x7));
u01= (-(*x1) + (*x6));
u11= (-(*x1) - (*x6));
u02= ( (*x2) - (*x5));
u12= ( (*x2) + (*x5));
u03= (-(*x3) + (*x4));
u13= (-(*x3) - (*x4));
} else { // DCTII
u00= ( (*x0) + (*x7));
u10= ( (*x0) - (*x7));
u01= ( (*x1) + (*x6));
u11= ( (*x1) - (*x6));
u02= ( (*x2) + (*x5));
u12= ( (*x2) - (*x5));
u03= ( (*x3) + (*x4));
u13= ( (*x3) - (*x4));
}
// _dctii_nrecurs4(u00,u01, u02, u03, &v00, &v01, &v02, &v03);
float w00= u00 + u03;
float w10= u00 - u03;
float w01= (u01 + u02);
float w11= (u01 - u02);
float v01= COSPI_1_8_SQRT2 * w10 + COSPI_3_8_SQRT2 * w11;
float v03= COSPI_3_8_SQRT2 * w10 - COSPI_1_8_SQRT2 * w11;
// _dctiv_nrecurs4(u10, u11, u12, u13, &v10, &v11, &v12, &v13);
float w20= ( COSN1[0] * u10 + SINN1[0] * u13);
float w30= (-SINN1[1] * u11 + COSN1[1] * u12);
float w21= ( COSN1[1] * u11 + SINN1[1] * u12);
float w31= -(-SINN1[0] * u10 + COSN1[0] * u13);
float v11 = w20 - w21 - w30 + w31;
float v12 = w20 - w21 + w30 - w31;
if (dst_not_dct) { // DSTII
// Invert output sequence
*x0 = (w30 + w31)* 0.5f; // v13 * SQRT1_8; z10 * 0.5f
*x1 = v03 * SQRT1_8;
*x2 = v12 * SQRT1_8;
*x3 = (w00 - w01) * SQRT1_8; // v02 * SQRT1_8
*x4 = v11 * SQRT1_8;
*x5 = v01 * SQRT1_8;
*x6 = (w20 + w21) * 0.5f; // v10 * SQRT1_8; z00 * 0.5f;
*x7 = (w00 + w01) * 0.5f; // SQRT1_8; // v00 * SQRT1_8 //*** no 1/sqrt(2)!
} else {
*x0 = (w00 + w01) * 0.5f; // SQRT1_8; // v00 * SQRT1_8 //*** no 1/sqrt(2)!
*x1 = (w20 + w21) * 0.5f; // v10 * SQRT1_8; z00 * 0.5f;
*x2 = v01 * SQRT1_8;
*x3 = v11 * SQRT1_8;
*x4 = (w00 - w01) * SQRT1_8; // v02 * SQRT1_8
*x5 = v12 * SQRT1_8;
*x6 = v03 * SQRT1_8;
*x7 = (w30 + w31)* 0.5f; // v13 * SQRT1_8; z10 * 0.5f
}
}
inline __device__ void dttiv_shared_mem(float * x0, int inc, int dst_not_dct)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment