Commit 514057c6 authored by Andrey Filippov's avatar Andrey Filippov

Implemented/tested per-tile geometric distortions setup

parent 50630abc
...@@ -96,7 +96,8 @@ public class GPUTileProcessor { ...@@ -96,7 +96,8 @@ public class GPUTileProcessor {
{"*","dtt8x8.h","dtt8x8.cu"}, {"*","dtt8x8.h","dtt8x8.cu"},
{"*","dtt8x8.h","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}}; {"*","dtt8x8.h","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}};
*/ */
static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}}; static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.cuh","TileProcessor.cuh"}}; // static String [][] GPU_SRC_FILES = {{"*","dtt8x8.cuh","TileProcessor.cuh"}};
static String GPU_CONVERT_CORRECT_TILES_NAME = "convert_correct_tiles"; // name in C code static String GPU_CONVERT_CORRECT_TILES_NAME = "convert_correct_tiles"; // name in C code
static String GPU_IMCLT_RBG_NAME = "imclt_rbg"; // name in C code static String GPU_IMCLT_RBG_NAME = "imclt_rbg"; // name in C code
...@@ -104,6 +105,8 @@ public class GPUTileProcessor { ...@@ -104,6 +105,8 @@ public class GPUTileProcessor {
// static String GPU_TEXTURES_NAME = "textures_gen"; // name in C code // static String GPU_TEXTURES_NAME = "textures_gen"; // name in C code
static String GPU_TEXTURES_NAME = "textures_accumulate"; // name in C code static String GPU_TEXTURES_NAME = "textures_accumulate"; // name in C code
static String GPU_RBGA_NAME = "generate_RBGA"; // name in C code static String GPU_RBGA_NAME = "generate_RBGA"; // name in C code
static String GPU_ROT_DERIV = "calc_rot_deriv"; // calculate rotation matrices and derivatives
static String SET_TILES_OFFSETS = "get_tiles_offsets"; // calculate pixel offsets and disparity distortions
// pass some defines to gpu source code with #ifdef JCUDA // pass some defines to gpu source code with #ifdef JCUDA
...@@ -148,7 +151,7 @@ public class GPUTileProcessor { ...@@ -148,7 +151,7 @@ public class GPUTileProcessor {
public static int RBYRDIST_LEN = 5001; //for double, 10001 - float; // length of rByRDist to allocate shared memory public static int RBYRDIST_LEN = 5001; //for double, 10001 - float; // length of rByRDist to allocate shared memory
public static double RBYRDIST_STEP = 0.0004; // for double, 0.0002 - for float; // to fit into GPU shared memory (was 0.001); public static double RBYRDIST_STEP = 0.0004; // for double, 0.0002 - for float; // to fit into GPU shared memory (was 0.001);
public static int TILES_PER_BLOCK_GEOM = 32; // blockDim.x = NUM_CAMS; blockDim.x = TILES_PER_BLOCK_GEOM public static int TILES_PER_BLOCK_GEOM = 32/NUM_CAMS; // blockDim.x = NUM_CAMS; blockDim.x = TILES_PER_BLOCK_GEOM
public static int TASK_TEXTURE_BITS = ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT)); public static int TASK_TEXTURE_BITS = ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT));
...@@ -163,15 +166,18 @@ public class GPUTileProcessor { ...@@ -163,15 +166,18 @@ public class GPUTileProcessor {
private CUfunction GPU_CORRELATE2D_kernel = null; private CUfunction GPU_CORRELATE2D_kernel = null;
private CUfunction GPU_TEXTURES_kernel = null; private CUfunction GPU_TEXTURES_kernel = null;
private CUfunction GPU_RBGA_kernel = null; private CUfunction GPU_RBGA_kernel = null;
private CUfunction GPU_ROT_DERIV_kernel = null;
private CUfunction SET_TILES_OFFSETS_kernel = null;
// CPU arrays of pointers to GPU memory // CPU arrays of pointers to GPU memory
// These arrays may go to method, they are here just to be able to free GPU memory if needed // These arrays may go to methods, they are here just to be able to free GPU memory if needed
private CUdeviceptr [] gpu_kernels_h = new CUdeviceptr[NUM_CAMS]; private CUdeviceptr [] gpu_kernels_h = new CUdeviceptr[NUM_CAMS];
private CUdeviceptr [] gpu_kernel_offsets_h = new CUdeviceptr[NUM_CAMS]; private CUdeviceptr [] gpu_kernel_offsets_h = new CUdeviceptr[NUM_CAMS];
private CUdeviceptr [] gpu_bayer_h = new CUdeviceptr[NUM_CAMS]; private CUdeviceptr [] gpu_bayer_h = new CUdeviceptr[NUM_CAMS];
private CUdeviceptr [] gpu_clt_h = new CUdeviceptr[NUM_CAMS]; private CUdeviceptr [] gpu_clt_h = new CUdeviceptr[NUM_CAMS];
private CUdeviceptr [] gpu_corr_images_h= new CUdeviceptr[NUM_CAMS]; private CUdeviceptr [] gpu_corr_images_h= new CUdeviceptr[NUM_CAMS];
// GPU pointers to array of GPU pointers // GPU pointers to array of GPU pointers
private CUdeviceptr gpu_kernels = new CUdeviceptr(); private CUdeviceptr gpu_kernels = new CUdeviceptr();
private CUdeviceptr gpu_kernel_offsets = new CUdeviceptr(); private CUdeviceptr gpu_kernel_offsets = new CUdeviceptr();
...@@ -187,6 +193,11 @@ public class GPUTileProcessor { ...@@ -187,6 +193,11 @@ public class GPUTileProcessor {
private CUdeviceptr gpu_num_texture_tiles = new CUdeviceptr(); // 8 ints private CUdeviceptr gpu_num_texture_tiles = new CUdeviceptr(); // 8 ints
private CUdeviceptr gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER private CUdeviceptr gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER
private CUdeviceptr gpu_correction_vector= new CUdeviceptr();
private CUdeviceptr gpu_rot_deriv= new CUdeviceptr(); // used internally by device, may be read to CPU for testing
private CUdeviceptr gpu_geometry_correction= new CUdeviceptr();
private CUdeviceptr gpu_rByRDist= new CUdeviceptr(); // calculated once for the camera distortion model in CPU (move to GPU?)
CUmodule module; // to access constants memory CUmodule module; // to access constants memory
private int mclt_stride; private int mclt_stride;
private int corr_stride; private int corr_stride;
...@@ -227,15 +238,22 @@ public class GPUTileProcessor { ...@@ -227,15 +238,22 @@ public class GPUTileProcessor {
flt[indx++] = Float.intBitsToFloat(tx + (ty << 16)); flt[indx++] = Float.intBitsToFloat(tx + (ty << 16));
float [][] offsets = use_aux? this.xy_aux: this.xy; float [][] offsets = use_aux? this.xy_aux: this.xy;
for (int i = 0; i < NUM_CAMS; i++) { for (int i = 0; i < NUM_CAMS; i++) {
flt[indx++] = offsets[i][0]; if (offsets != null) {
flt[indx++] = offsets[i][1]; flt[indx++] = offsets[i][0];
flt[indx++] = offsets[i][1];
} else {
indx+= 2;
}
} }
flt[indx++] = this.target_disparity; flt[indx++] = this.target_disparity;
for (int i = 0; i < NUM_CAMS; i++) { // actually disp_dist will be initialized by the GPU for (int i = 0; i < NUM_CAMS; i++) { // actually disp_dist will be initialized by the GPU
indx+= 4;
/*
flt[indx++] = disp_dist[i][0]; flt[indx++] = disp_dist[i][0];
flt[indx++] = disp_dist[i][1]; flt[indx++] = disp_dist[i][1];
flt[indx++] = disp_dist[i][2]; flt[indx++] = disp_dist[i][2];
flt[indx++] = disp_dist[i][3]; flt[indx++] = disp_dist[i][3];
*/
} }
return flt; return flt;
} }
...@@ -446,16 +464,22 @@ public class GPUTileProcessor { ...@@ -446,16 +464,22 @@ public class GPUTileProcessor {
GPU_IMCLT_RBG_NAME, GPU_IMCLT_RBG_NAME,
GPU_CORRELATE2D_NAME, GPU_CORRELATE2D_NAME,
GPU_TEXTURES_NAME, GPU_TEXTURES_NAME,
GPU_RBGA_NAME}; GPU_RBGA_NAME,
GPU_ROT_DERIV,
SET_TILES_OFFSETS
};
CUfunction[] functions = createFunctions(kernelSources, CUfunction[] functions = createFunctions(kernelSources,
func_names, func_names,
capability); // on my - 75 capability); // on my - 75
this.GPU_CONVERT_CORRECT_TILES_kernel = functions[0]; GPU_CONVERT_CORRECT_TILES_kernel = functions[0];
this.GPU_IMCLT_RBG_kernel = functions[1]; GPU_IMCLT_RBG_kernel = functions[1];
this.GPU_CORRELATE2D_kernel = functions[2]; GPU_CORRELATE2D_kernel = functions[2];
this.GPU_TEXTURES_kernel= functions[3]; GPU_TEXTURES_kernel= functions[3];
this.GPU_RBGA_kernel= functions[4]; GPU_RBGA_kernel= functions[4];
GPU_ROT_DERIV_kernel = functions[5];
SET_TILES_OFFSETS_kernel = functions[6];
System.out.println("GPU kernel functions initialized"); System.out.println("GPU kernel functions initialized");
System.out.println(GPU_CONVERT_CORRECT_TILES_kernel.toString()); System.out.println(GPU_CONVERT_CORRECT_TILES_kernel.toString());
...@@ -463,6 +487,8 @@ public class GPUTileProcessor { ...@@ -463,6 +487,8 @@ public class GPUTileProcessor {
System.out.println(GPU_CORRELATE2D_kernel.toString()); System.out.println(GPU_CORRELATE2D_kernel.toString());
System.out.println(GPU_TEXTURES_kernel.toString()); System.out.println(GPU_TEXTURES_kernel.toString());
System.out.println(GPU_RBGA_kernel.toString()); System.out.println(GPU_RBGA_kernel.toString());
System.out.println(GPU_ROT_DERIV_kernel.toString());
System.out.println(SET_TILES_OFFSETS_kernel.toString());
// Init data arrays for all kernels // Init data arrays for all kernels
int tilesX = IMG_WIDTH / DTT_SIZE; int tilesX = IMG_WIDTH / DTT_SIZE;
...@@ -522,9 +548,15 @@ public class GPUTileProcessor { ...@@ -522,9 +548,15 @@ public class GPUTileProcessor {
for (int ncam = 0; ncam < NUM_CAMS; ncam++) gpu_clt_l[ncam] = getPointerAddress(gpu_clt_h[ncam]); for (int ncam = 0; ncam < NUM_CAMS; ncam++) gpu_clt_l[ncam] = getPointerAddress(gpu_clt_h[ncam]);
cuMemcpyHtoD(gpu_clt, Pointer.to(gpu_clt_l), NUM_CAMS * Sizeof.POINTER); cuMemcpyHtoD(gpu_clt, Pointer.to(gpu_clt_l), NUM_CAMS * Sizeof.POINTER);
// Set task array // Set GeometryCorrection data
cuMemAlloc(gpu_tasks, tilesX * tilesY * TPTASK_SIZE * Sizeof.POINTER); cuMemAlloc(gpu_geometry_correction, GeometryCorrection.arrayLength(NUM_CAMS) * Sizeof.FLOAT);
cuMemAlloc(gpu_rByRDist, RBYRDIST_LEN * Sizeof.FLOAT);
cuMemAlloc(gpu_rot_deriv, 5*NUM_CAMS*3*3 * Sizeof.FLOAT);
cuMemAlloc(gpu_correction_vector, GeometryCorrection.CorrVector.LENGTH * Sizeof.FLOAT);
// Set task array
cuMemAlloc(gpu_tasks, tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT);
//=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============
// Set corrs array // Set corrs array
/// cuMemAlloc(gpu_corrs, tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER); /// cuMemAlloc(gpu_corrs, tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER);
cuMemAlloc(gpu_corr_indices, tilesX * tilesY * NUM_PAIRS * Sizeof.POINTER); cuMemAlloc(gpu_corr_indices, tilesX * tilesY * NUM_PAIRS * Sizeof.POINTER);
...@@ -568,6 +600,27 @@ public class GPUTileProcessor { ...@@ -568,6 +600,27 @@ public class GPUTileProcessor {
} }
public void setGeometryCorrection(GeometryCorrection gc) {
float [] fgc = gc.toFloatArray();
double [] rByRDist = gc.getRByRDist();
float [] fFByRDist = new float [rByRDist.length];
for (int i = 0; i < rByRDist.length; i++) {
fFByRDist[i] = (float) rByRDist[i];
}
cuMemcpyHtoD(gpu_geometry_correction, Pointer.to(fgc), fgc.length * Sizeof.FLOAT);
cuMemcpyHtoD(gpu_rByRDist, Pointer.to(fFByRDist), fFByRDist.length * Sizeof.FLOAT);
cuMemAlloc (gpu_rot_deriv, 5 * NUM_CAMS *3 *3 * Sizeof.FLOAT); // NCAM of 3x3 rotation matrices, plus 4 derivative matrices for each camera
}
public void setExtrinsicsVector(GeometryCorrection.CorrVector cv) {
double [] dcv = cv.toFullRollArray();
float [] fcv = new float [dcv.length];
for (int i = 0; i < dcv.length; i++) {
fcv[i] = (float) dcv[i];
}
cuMemcpyHtoD(gpu_correction_vector, Pointer.to(fcv), fcv.length * Sizeof.FLOAT);
}
public void setTasks(TpTask [] tile_tasks, boolean use_aux) // while is it in class member? - just to be able to free public void setTasks(TpTask [] tile_tasks, boolean use_aux) // while is it in class member? - just to be able to free
{ {
...@@ -576,7 +629,7 @@ public class GPUTileProcessor { ...@@ -576,7 +629,7 @@ public class GPUTileProcessor {
for (int i = 0; i < num_task_tiles; i++) { for (int i = 0; i < num_task_tiles; i++) {
tile_tasks[i].asFloatArray(ftasks, i* TPTASK_SIZE, use_aux); tile_tasks[i].asFloatArray(ftasks, i* TPTASK_SIZE, use_aux);
} }
cuMemcpyHtoD(gpu_tasks, Pointer.to(ftasks), TPTASK_SIZE * num_task_tiles * Sizeof.FLOAT); cuMemcpyHtoD(gpu_tasks, Pointer.to(ftasks), TPTASK_SIZE * num_task_tiles * Sizeof.FLOAT);
} }
public void setCorrIndices(int [] corr_indices) public void setCorrIndices(int [] corr_indices)
...@@ -700,6 +753,7 @@ public class GPUTileProcessor { ...@@ -700,6 +753,7 @@ public class GPUTileProcessor {
// prepare tasks for full frame, same dispaity. // prepare tasks for full frame, same dispaity.
// need to run setTasks(TpTask [] tile_tasks, boolean use_aux) to format/transfer to GPU memory // need to run setTasks(TpTask [] tile_tasks, boolean use_aux) to format/transfer to GPU memory
public TpTask [] setFullFrameImages( public TpTask [] setFullFrameImages(
boolean calc_offsets, // old way, now not needed with GPU calculation
Rectangle woi, Rectangle woi,
boolean round_woi, boolean round_woi,
float target_disparity, // apply same disparity to all tiles float target_disparity, // apply same disparity to all tiles
...@@ -725,6 +779,7 @@ public class GPUTileProcessor { ...@@ -725,6 +779,7 @@ public class GPUTileProcessor {
corr_masks[i] = corr_mask; // 0x3f; // all 6 correlations corr_masks[i] = corr_mask; // 0x3f; // all 6 correlations
} }
return setFullFrameImages( return setFullFrameImages(
calc_offsets, // boolean calc_offsets, // old way, now not needed with GPU calculation
woi, // Rectangle woi, woi, // Rectangle woi,
round_woi, // boolean round_woi, round_woi, // boolean round_woi,
target_disparities, // should be tilesX*tilesY long target_disparities, // should be tilesX*tilesY long
...@@ -740,6 +795,7 @@ public class GPUTileProcessor { ...@@ -740,6 +795,7 @@ public class GPUTileProcessor {
} }
public TpTask [] setFullFrameImages( public TpTask [] setFullFrameImages(
boolean calc_offsets, // old way, now not needed with GPU calculation
Rectangle woi, // or null Rectangle woi, // or null
boolean round_woi, boolean round_woi,
float [] target_disparities, // should be tilesX*tilesY long float [] target_disparities, // should be tilesX*tilesY long
...@@ -838,13 +894,15 @@ public class GPUTileProcessor { ...@@ -838,13 +894,15 @@ public class GPUTileProcessor {
indx++; indx++;
} }
} }
getTileSubcamOffsets( if (calc_offsets) {
tp_tasks, // final TpTask[] tp_tasks, // will use // modify to have offsets for 8 cameras getTileSubcamOffsets(
(use_master? geometryCorrection_main: null), // final GeometryCorrection geometryCorrection_main, tp_tasks, // final TpTask[] tp_tasks, // will use // modify to have offsets for 8 cameras
(use_aux? geometryCorrection_aux: null), // final GeometryCorrection geometryCorrection_aux, // if null, will only calculate offsets fro the main camera (use_master? geometryCorrection_main: null), // final GeometryCorrection geometryCorrection_main,
ers_delay, // final double [][][] ers_delay, // if not null - fill with tile center acquisition delay (use_aux? geometryCorrection_aux: null), // final GeometryCorrection geometryCorrection_aux, // if null, will only calculate offsets fro the main camera
threadsMax, // final int threadsMax, // maximal number of threads to launch ers_delay, // final double [][][] ers_delay, // if not null - fill with tile center acquisition delay
debugLevel); // final int debugLevel) threadsMax, // final int threadsMax, // maximal number of threads to launch
debugLevel); // final int debugLevel)
}
return tp_tasks; return tp_tasks;
} }
...@@ -966,6 +1024,58 @@ public class GPUTileProcessor { ...@@ -966,6 +1024,58 @@ public class GPUTileProcessor {
// All data is already copied to GPU memory // All data is already copied to GPU memory
public void execRotDerivs() {
if (GPU_ROT_DERIV_kernel == null)
{
IJ.showMessage("Error", "No GPU kernel: GPU_ROT_DERIV_kernel");
return;
}
// kernel parameters: pointer to pointers
int [] GridFullWarps = {NUM_CAMS, 1, 1}; // round up
int [] ThreadsFullWarps = {3, 3, 3};
Pointer kernelParameters = Pointer.to(
Pointer.to(gpu_correction_vector),
Pointer.to(gpu_rot_deriv)
);
cuCtxSynchronize();
// Call the kernel function
cuLaunchKernel(GPU_ROT_DERIV_kernel,
GridFullWarps[0], GridFullWarps[1], GridFullWarps[2], // Grid dimension
ThreadsFullWarps[0], ThreadsFullWarps[1],ThreadsFullWarps[2],// Block dimension
0, null, // Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters, null); // Kernel- and extra parameters
cuCtxSynchronize(); // remove later
}
public void execSetTilesOffsets() {
if (SET_TILES_OFFSETS_kernel == null)
{
IJ.showMessage("Error", "No GPU kernel: SET_TILES_OFFSETS_kernel");
return;
}
// kernel parameters: pointer to pointers
int [] GridFullWarps = {(num_task_tiles + TILES_PER_BLOCK_GEOM - 1)/TILES_PER_BLOCK_GEOM, 1, 1}; // round up
int [] ThreadsFullWarps = {NUM_CAMS, TILES_PER_BLOCK_GEOM, 1}; // 4,8,1
Pointer kernelParameters = Pointer.to(
Pointer.to(gpu_tasks), // struct tp_task * gpu_tasks,
Pointer.to(new int[] { num_task_tiles }),// int num_tiles, // number of tiles in task list
Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
Pointer.to(gpu_correction_vector), // struct corr_vector * gpu_correction_vector,
Pointer.to(gpu_rByRDist), // float * gpu_rByRDist) // length should match RBYRDIST_LEN
Pointer.to(gpu_rot_deriv)); // trot_deriv * gpu_rot_deriv);
cuCtxSynchronize();
cuLaunchKernel(SET_TILES_OFFSETS_kernel,
GridFullWarps[0], GridFullWarps[1], GridFullWarps[2], // Grid dimension
ThreadsFullWarps[0], ThreadsFullWarps[1],ThreadsFullWarps[2],// Block dimension
0, null, // Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters, null); // Kernel- and extra parameters
cuCtxSynchronize(); // remove later
}
public void execConverCorrectTiles() { public void execConverCorrectTiles() {
if (GPU_CONVERT_CORRECT_TILES_kernel == null) if (GPU_CONVERT_CORRECT_TILES_kernel == null)
{ {
...@@ -1437,6 +1547,7 @@ public class GPUTileProcessor { ...@@ -1437,6 +1547,7 @@ public class GPUTileProcessor {
// for (String sourceCode: sourceCodeUnits) { // for (String sourceCode: sourceCodeUnits) {
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) { for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
String sourceCode = sourceCodeUnits[cunit]; String sourceCode = sourceCodeUnits[cunit];
//System.out.print(sourceCode);
// Use the NVRTC to create a program by compiling the source code // Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram(); nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null); nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
......
...@@ -156,6 +156,11 @@ public class GeometryCorrection { ...@@ -156,6 +156,11 @@ public class GeometryCorrection {
(float) disparityRadius //=150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad (float) disparityRadius //=150.0; // distance between cameras to normalize disparity units to. sqrt(2)*disparityRadius for quad
}; };
} }
public static int arrayLength(int ncam) {
return 21+8*ncam;
}
public double [] toDoubleArray() { // for GPU comparison public double [] toDoubleArray() { // for GPU comparison
return new double[] { return new double[] {
pixelCorrectionWidth, // =2592; // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2) pixelCorrectionWidth, // =2592; // virtual camera center is at (pixelCorrectionWidth/2, pixelCorrectionHeight/2)
...@@ -322,6 +327,11 @@ cameraRadius, // average distance from the "mass center" of the sensors to t ...@@ -322,6 +327,11 @@ cameraRadius, // average distance from the "mass center" of the sensors to t
return wh; return wh;
} }
public GeometryCorrection() {
// just to get the length of toFloatArray()
resetCorrVector();
}
public GeometryCorrection(double [] extrinsic_corr) public GeometryCorrection(double [] extrinsic_corr)
{ {
this.extrinsic_corr = new CorrVector(extrinsic_corr); this.extrinsic_corr = new CorrVector(extrinsic_corr);
...@@ -1328,7 +1338,7 @@ cameraRadius, // average distance from the "mass center" of the sensors to t ...@@ -1328,7 +1338,7 @@ cameraRadius, // average distance from the "mass center" of the sensors to t
public class CorrVector{ public class CorrVector{
static final int LENGTH = 19; // 10; public static final int LENGTH =19; // 10;
static final int LENGTH_ANGLES =10; static final int LENGTH_ANGLES =10;
static final int TILT_INDEX = 0; static final int TILT_INDEX = 0;
static final int AZIMUTH_INDEX = 3; static final int AZIMUTH_INDEX = 3;
...@@ -3307,6 +3317,245 @@ matrix([[-0.125, -0.125, 0.125, 0.125, -0.125, 0.125, -0. , -0. , -0. ...@@ -3307,6 +3317,245 @@ matrix([[-0.125, -0.125, 0.125, 0.125, -0.125, 0.125, -0. , -0. , -0.
return pXY; return pXY;
} }
public double [][] getPortsCoordinatesAndDerivativesDbg( // To print intermediate results for comparison with the GPU
GeometryCorrection gc_main,
boolean use_rig_offsets,
Matrix [] rots,
Matrix [][] deriv_rots,
double [][] pXYderiv, // if not null, should be double[8][] - not used here
double [][] disp_dist, //
double px,
double py,
double disparity)
{
// String dbg_s = corr_vector.toString();
/* Starting with required tile center X, Y and nominal distortion, for each sensor port:
* 1) unapply common distortion (maybe for different - master camera)
* 2) apply disparity
* 3) apply rotations and zoom
* 4) re-apply distortion
* 5) return port center X and Y
* line_time
*/
// moved here so disp_dist and imu will always be created
if (disp_dist == null) {
disp_dist = new double [numSensors][4];
}
double [] imu = null;
if (disp_dist != null) {
imu = extrinsic_corr.getIMU(); // currently it is common for all channels
if ((deriv_rots == null) && ((imu[0] != 0.0) || (imu[1] != 0.0) ||(imu[2] != 0.0))){
deriv_rots = extrinsic_corr.getRotDeriveMatrices();
}
}
/// if ((disp_dist == null) && (pXYderiv != null)) {
/// disp_dist = new double [numSensors][4];
/// }
double [][] rXY = getRXY(use_rig_offsets); // may include rig offsets
double [][] pXY = new double [numSensors][2];
double pXcd = px - 0.5 * gc_main.pixelCorrectionWidth;
double pYcd = py - 0.5 * gc_main.pixelCorrectionHeight;
double rD = Math.sqrt(pXcd*pXcd + pYcd*pYcd)*0.001*gc_main.pixelSize; // distorted radius in a virtual center camera
double rND2R=gc_main.getRByRDist(rD/gc_main.distortionRadius, (debugLevel > -1));
double pXc = pXcd * rND2R; // non-distorted coordinates relative to the (0.5 * this.pixelCorrectionWidth, 0.5 * this.pixelCorrectionHeight)
double pYc = pYcd * rND2R; // in pixels
System.out.println("px="+px+", py="+py);
System.out.println("pXcd="+pXcd+", pYcd="+pYcd);
System.out.println("rD="+rD+", rND2R="+rND2R);
System.out.println("pXc="+pXc+", pYc="+pYc);
// next radial distortion coefficients are for this, not master camera (may be the same)
double [] rad_coeff={this.distortionC,this.distortionB,this.distortionA,this.distortionA5,this.distortionA6,this.distortionA7,this.distortionA8};
double fl_pix = focalLength/(0.001*pixelSize); // focal length in pixels - this camera
double ri_scale = 0.001 * this.pixelSize / this.distortionRadius;
System.out.println("fl_pix="+fl_pix+", ri_scale="+ri_scale);
double [] xyz = (disparity > 0) ? getWorldCoordinates( // USED in lwir
px, // double px,
py, // double py,
disparity, // double disparity,
true) : null; // boolean correctDistortions)
System.out.println("xyz[0]="+xyz[0]+", xyz[1]="+xyz[1]+", xyz[2]="+xyz[2]);
for (int i = 0; i < numSensors; i++){
// non-distorted XY of the shifted location of the individual sensor
double pXci0 = pXc - disparity * rXY[i][0]; // in pixels
double pYci0 = pYc - disparity * rXY[i][1];
// rectilinear, end of dealing with possibly other (master) camera, below all is for this camera distortions
System.out.println("ncam="+i+": pXci0="+pXci0+", pYci0="+pYci0);
// Convert a 2-d non-distorted vector to 3d at fl_pix distance in z direction
double [][] avi = {{pXci0}, {pYci0},{fl_pix}};
Matrix vi = new Matrix(avi); // non-distorted sensor channel view vector in pixels (z -along the common axis)
System.out.println("ncam="+i+": vi=");
vi.print(10, 5);
// Apply port-individual combined rotation/zoom matrix
Matrix rvi = rots[i].times(vi);
System.out.println("ncam="+i+": rvi="); rvi.print(10, 5);
// get back to the projection plane by normalizing vector
double norm_z = fl_pix/rvi.get(2, 0);
double pXci = rvi.get(0, 0) * norm_z;
double pYci = rvi.get(1, 0) * norm_z;
System.out.println("ncam="+i+": norm_z="+norm_z+", pXci="+pXci+", pYci="+pYci);
// Re-apply distortion
double rNDi = Math.sqrt(pXci*pXci + pYci*pYci); // in pixels
// Rdist/R=A8*R^7+A7*R^6+A6*R^5+A5*R^4+A*R^3+B*R^2+C*R+(1-A6-A7-A6-A5-A-B-C)");
double ri = rNDi* ri_scale; // relative to distortion radius
// double rD2rND = (1.0 - distortionA8 - distortionA7 - distortionA6 - distortionA5 - distortionA - distortionB - distortionC);
System.out.println("ncam="+i+": rNDi="+rNDi+", ri="+ri);
double rD2rND = 1.0;
double rri = 1.0;
for (int j = 0; j < rad_coeff.length; j++){
rri *= ri;
rD2rND += rad_coeff[j]*(rri - 1.0); // Fixed
}
System.out.println("ncam="+i+": rri="+rri+", rD2rND="+rD2rND);
// Get port pixel coordinates by scaling the 2d vector with Rdistorted/Dnondistorted coefficient)
double pXid = pXci * rD2rND;
double pYid = pYci * rD2rND;
System.out.println("ncam="+i+": pXid="+pXid+", pYid="+pYid);
pXY[i][0] = pXid + this.pXY0[i][0];
pXY[i][1] = pYid + this.pXY0[i][1];
System.out.println("pXY["+i+"][0]="+pXY[i][0]+", pXY["+i+"][1]="+pXY[i][1]);
// used when calculating derivatives, TODO: combine calculations !
double drD2rND_dri = 0.0;
Matrix drvi_daz = null;
Matrix drvi_dtl = null;
Matrix drvi_drl = null;
double dpXci_dazimuth = 0.0;
double dpYci_dazimuth = 0.0;
double dpXci_dtilt = 0.0;
double dpYci_dtilt = 0.0;
double dpXci_droll = 0.0;
double dpYci_droll = 0.0;
if ((disp_dist != null) || (pXYderiv != null)) {
rri = 1.0;
for (int j = 0; j < rad_coeff.length; j++){
drD2rND_dri += rad_coeff[j] * (j+1) * rri;
rri *= ri;
}
if (deriv_rots != null) {
// needed for derivatives and IMU
drvi_daz = deriv_rots[i][0].times(vi);
drvi_dtl = deriv_rots[i][1].times(vi);
drvi_drl = deriv_rots[i][2].times(vi);
System.out.println("ncam="+i+": drvi_daz="); drvi_daz.print(10, 5);
System.out.println("ncam="+i+": drvi_dtl="); drvi_dtl.print(10, 5);
System.out.println("ncam="+i+": drvi_drl="); drvi_drl.print(10, 5);
dpXci_dazimuth = drvi_daz.get(0, 0) * norm_z - pXci * drvi_daz.get(2, 0) / rvi.get(2, 0);
dpYci_dazimuth = drvi_daz.get(1, 0) * norm_z - pYci * drvi_daz.get(2, 0) / rvi.get(2, 0);
dpXci_dtilt = drvi_dtl.get(0, 0) * norm_z - pXci * drvi_dtl.get(2, 0) / rvi.get(2, 0);
dpYci_dtilt = drvi_dtl.get(1, 0) * norm_z - pYci * drvi_dtl.get(2, 0) / rvi.get(2, 0);
dpXci_droll = drvi_drl.get(0, 0) * norm_z - pXci * drvi_drl.get(2, 0) / rvi.get(2, 0);
dpYci_droll = drvi_drl.get(1, 0) * norm_z - pYci * drvi_drl.get(2, 0) / rvi.get(2, 0);
System.out.println("ncam="+i+": dpXci_dazimuth="+dpXci_dazimuth+", dpYci_dazimuth="+dpYci_dazimuth);
System.out.println("ncam="+i+": dpXci_dtilt="+ dpXci_dtilt+ ", dpYci_dtilt="+ dpYci_dtilt);
System.out.println("ncam="+i+": dpXci_droll="+ dpXci_droll+ ", dpYci_droll="+ dpYci_droll);
}
}
double delta_t = 0.0;
// double [] imu = null;
double [][] dpXci_pYci_imu_lin = new double[2][3]; // null
if (disp_dist != null) {
disp_dist[i] = new double [4]; // dx/d_disp, dx_d_ccw_disp
// Not clear - what should be in Z direction before rotation here?
double [][] add0 = {
{-rXY[i][0], rXY[i][1], 0.0},
{-rXY[i][1], -rXY[i][0], 0.0},
{ 0.0, 0.0, 0.0}}; // what is last element???
Matrix dd0 = new Matrix(add0);
Matrix dd1 = rots[i].times(dd0).getMatrix(0, 1,0,1).times(norm_z); // get top left 2x2 sub-matrix
//// Matrix dd1 = dd0.getMatrix(0, 1,0,1); // get top left 2x2 sub-matrix
// now first column of 2x2 dd1 - x, y components of derivatives by disparity, second column - derivatives by ortho to disparity (~Y in 2d correlation)
// unity vector in the direction of radius
System.out.println("ncam="+i+": dd1="); dd1.print(10, 5);
double c_dist = pXci/rNDi;
double s_dist = pYci/rNDi;
double [][] arot2= {
{c_dist, s_dist},
{-s_dist, c_dist}};
Matrix rot2 = new Matrix(arot2); // convert from non-distorted X,Y to parallel and perpendicular (CCW) to the radius
System.out.println("ncam="+i+": rot2="); rot2.print(10, 5);
double [][] ascale_distort = {
{rD2rND + ri* drD2rND_dri, 0 },
{0, rD2rND}};
Matrix scale_distort = new Matrix(ascale_distort); // scale component parallel to radius as distortion derivative, perpendicular - as distortion
Matrix dd2 = rot2.transpose().times(scale_distort).times(rot2).times(dd1);
System.out.println("ncam="+i+": scale_distortXrot2Xdd1="); scale_distort.times(rot2).times(dd1).print(10, 5);
System.out.println("ncam="+i+": dd2="); dd2.print(10, 5);
disp_dist[i][0] = dd2.get(0, 0);
disp_dist[i][1] = dd2.get(0, 1);
disp_dist[i][2] = dd2.get(1, 0); // d_py/d_disp
disp_dist[i][3] = dd2.get(1, 1);
System.out.println("disp_dist["+i+"][0]="+ disp_dist[i][0]);
System.out.println("disp_dist["+i+"][1]="+ disp_dist[i][1]);
System.out.println("disp_dist["+i+"][2]="+ disp_dist[i][2]);
System.out.println("disp_dist["+i+"][3]="+ disp_dist[i][3]);
// imu = extrinsic_corr.getIMU(i); // currently it is common for all channels
// ERS linear does not yet use per-port rotations, probably not needed
// double [][] dpXci_pYci_imu_lin = new double[2][3]; // null
if ((imu != null) &&((imu[0] != 0.0) || (imu[1] != 0.0) ||(imu[2] != 0.0) ||(imu[3] != 0.0) ||(imu[4] != 0.0) ||(imu[5] != 0.0))) {
delta_t = dd2.get(1, 0) * disparity * line_time; // positive for top cameras, negative - for bottom
double ers_Xci = delta_t* (dpXci_dtilt * imu[0] + dpXci_dazimuth * imu[1] + dpXci_droll * imu[2]);
double ers_Yci = delta_t* (dpYci_dtilt * imu[0] + dpYci_dazimuth * imu[1] + dpYci_droll * imu[2]);
if (xyz != null) {
double k = SCENE_UNITS_SCALE * this.disparityRadius;
double wdisparity = disparity;
double dwdisp_dz = (k * this.focalLength / (0.001*this.pixelSize)) / (xyz[2] * xyz[2]);
System.out.println("ncam="+i+": k="+k+", wdisparity="+wdisparity+", dwdisp_dz="+dwdisp_dz);
dpXci_pYci_imu_lin[0][0] = -wdisparity / k; // dpx/ dworld_X
dpXci_pYci_imu_lin[1][1] = wdisparity / k; // dpy/ dworld_Y
dpXci_pYci_imu_lin[0][2] = (xyz[0] / k) * dwdisp_dz; // dpx/ dworld_Z
dpXci_pYci_imu_lin[1][2] = (xyz[1] / k) * dwdisp_dz; // dpy/ dworld_Z
System.out.println("ncam="+i+
": dpXci_pYci_imu_lin[0][0]="+dpXci_pYci_imu_lin[0][0]+
", dpXci_pYci_imu_lin[0][2]="+dpXci_pYci_imu_lin[0][2]);
System.out.println("ncam="+i+
": dpXci_pYci_imu_lin[1][1]="+dpXci_pYci_imu_lin[1][1]+
", dpXci_pYci_imu_lin[1][2]="+dpXci_pYci_imu_lin[1][2]);
ers_Xci += delta_t* (dpXci_pYci_imu_lin[0][0] * imu[3] + dpXci_pYci_imu_lin[0][2] * imu[5]);
ers_Yci += delta_t* (dpXci_pYci_imu_lin[1][1] * imu[4] + dpXci_pYci_imu_lin[1][2] * imu[5]);
System.out.println("ncam="+i+": ers_Xci="+ers_Xci+", ers_Yci="+ers_Yci);
}
pXY[i][0] += ers_Xci * rD2rND; // added correction to pixel X
pXY[i][1] += ers_Yci * rD2rND; // added correction to pixel Y
System.out.println("pXY["+i+"][0]="+pXY[i][0]+", pXY["+i+"][1]="+pXY[i][1]);
} else {
imu = null;
}
// TODO: calculate derivatives of pX, pY by 3 imu omegas
}
}
return pXY;
}
// private Matrix m_balance_xy = null; // [2*numSensors][2*numSensors] 8x8 matrix to make XY ports correction to have average == 0 // private Matrix m_balance_xy = null; // [2*numSensors][2*numSensors] 8x8 matrix to make XY ports correction to have average == 0
// private Matrix m_balance_dd = null; // [2*numSensors+1)][2*numSensors] 9x8 matrix to extract disparity from dd // private Matrix m_balance_dd = null; // [2*numSensors+1)][2*numSensors] 9x8 matrix to extract disparity from dd
......
...@@ -9705,6 +9705,23 @@ public class ImageDtt { ...@@ -9705,6 +9705,23 @@ public class ImageDtt {
centerX, centerX,
centerY, centerY,
disparity_aux); // + disparity_corr); disparity_aux); // + disparity_corr);
if ((tileX == debug_tileX ) && (tileY == debug_tileY )) {
// will just print debug data
geometryCorrection_main.getPortsCoordinatesAndDerivativesDbg(
geometryCorrection_main, // GeometryCorrection gc_main,
false, // boolean use_rig_offsets,
corr_rots_main, // Matrix [] rots,
null, // Matrix [][] deriv_rots,
null, // double [][] pXYderiv, // if not null, should be double[8][]
disp_dist_main, // used to correct 3D correlations
centerX,
centerY,
disparity_main); // + disparity_corr);
}
// acquisition time of the tiles centers in scanline times // acquisition time of the tiles centers in scanline times
if (ers_delay != null) { if (ers_delay != null) {
for (int i = 0; i < quad_main; i++) ers_delay[0][i][nTile] = centersXY_main[i][1]-geometryCorrection_main.woi_tops[i]; for (int i = 0; i < quad_main; i++) ers_delay[0][i][nTile] = centersXY_main[i][1]-geometryCorrection_main.woi_tops[i];
......
...@@ -2018,7 +2018,8 @@ public class TwoQuadCLT { ...@@ -2018,7 +2018,8 @@ public class TwoQuadCLT {
clt_parameters.gpu_woi_twidth, clt_parameters.gpu_woi_twidth,
clt_parameters.gpu_woi_theight); clt_parameters.gpu_woi_theight);
GPUTileProcessor.TpTask [] tp_tasks = gPUTileProcessor.setFullFrameImages( GPUTileProcessor.TpTask [] tp_tasks = gPUTileProcessor.setFullFrameImages(
twoi, // Rectangle woi, false, // boolean calc_offsets, // old way, now not needed with GPU calculation
twoi, // Rectangle woi,
clt_parameters.gpu_woi_round, // boolean round_woi, clt_parameters.gpu_woi_round, // boolean round_woi,
(float) clt_parameters.disparity, // float target_disparity, // apply same disparity to all tiles (float) clt_parameters.disparity, // float target_disparity, // apply same disparity to all tiles
0xf, // int out_image, // from which tiles to generate image (currently 0/1) 0xf, // int out_image, // from which tiles to generate image (currently 0/1)
...@@ -2031,7 +2032,6 @@ public class TwoQuadCLT { ...@@ -2031,7 +2032,6 @@ public class TwoQuadCLT {
threadsMax, // final int threadsMax, // maximal number of threads to launch threadsMax, // final int threadsMax, // maximal number of threads to launch
debugLevel); // final int debugLevel) debugLevel); // final int debugLevel)
// Optionally save offsets here? // Optionally save offsets here?
// EyesisCorrectionParameters.CorrectionParameters ecp, // EyesisCorrectionParameters.CorrectionParameters ecp,
boolean save_ports_xy = false; // true; Same files as saved with the kernels boolean save_ports_xy = false; // true; Same files as saved with the kernels
...@@ -2087,6 +2087,8 @@ public class TwoQuadCLT { ...@@ -2087,6 +2087,8 @@ public class TwoQuadCLT {
tp_tasks); tp_tasks);
gPUTileProcessor.setTextureIndices( gPUTileProcessor.setTextureIndices(
texture_indices); texture_indices);
gPUTileProcessor.setGeometryCorrection(quadCLT_main.getGeometryCorrection()); // once
gPUTileProcessor.setExtrinsicsVector(quadCLT_main.getGeometryCorrection().getCorrVector()); // for each new image
// TODO: calculate from the camera geometry? // TODO: calculate from the camera geometry?
double[][] port_offsets = { // used only in textures to scale differences double[][] port_offsets = { // used only in textures to scale differences
...@@ -2099,7 +2101,20 @@ public class TwoQuadCLT { ...@@ -2099,7 +2101,20 @@ public class TwoQuadCLT {
int NREPEAT = 1; // 00; int NREPEAT = 1; // 00;
System.out.println("\n------------ Running GPU "+NREPEAT+" times ----------------"); System.out.println("\n------------ Running GPU "+NREPEAT+" times ----------------");
long startGPU=System.nanoTime(); long startGPU=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execConverCorrectTiles(); for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execRotDerivs();
}
long startTasksSetup=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execSetTilesOffsets();
}
long startDirectConvert=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execConverCorrectTiles();
}
// run imclt; // run imclt;
long startIMCLT=System.nanoTime(); long startIMCLT=System.nanoTime();
...@@ -2145,18 +2160,26 @@ public class TwoQuadCLT { ...@@ -2145,18 +2160,26 @@ public class TwoQuadCLT {
long endTexturesRBGA = System.nanoTime(); long endTexturesRBGA = System.nanoTime();
long endGPUTime = System.nanoTime(); long endGPUTime = System.nanoTime();
long firstGPUTime= (startIMCLT- startGPU) /NREPEAT;
long runImcltTime = (endImcltTime - startIMCLT) /NREPEAT; long rotDerivsTime= (startTasksSetup- startGPU) /NREPEAT;
long runCorr2DTime = (endCorr2d - startCorr2d) /NREPEAT; long tasksSetupTime= (startDirectConvert- startTasksSetup) /NREPEAT;
long runTexturesTime = (endTextures - startTextures) /NREPEAT; long firstGPUTime= (startIMCLT- startDirectConvert) /NREPEAT;
long runTexturesRBGATime = (endTexturesRBGA - startTexturesRBGA)/NREPEAT; long runImcltTime = (endImcltTime - startIMCLT) /NREPEAT;
long runGPUTime = (endGPUTime - startGPU) /NREPEAT; long runCorr2DTime = (endCorr2d - startCorr2d) /NREPEAT;
long runTexturesTime = (endTextures - startTextures) /NREPEAT;
long runTexturesRBGATime = (endTexturesRBGA - startTexturesRBGA) /NREPEAT;
long runGPUTime = (endGPUTime - startGPU) /NREPEAT;
// run corr2d // run corr2d
System.out.println("\n------------ End of running GPU "+NREPEAT+" times ----------------"); System.out.println("\n------------ End of running GPU "+NREPEAT+" times ----------------");
System.out.println("GPU run time ="+(runGPUTime * 1.0e-6)+"ms, (direct conversion: "+(firstGPUTime*1.0e-6)+"ms, imclt: "+ System.out.println("GPU run time ="+ (runGPUTime * 1.0e-6)+"ms");
(runImcltTime*1.0e-6)+"ms), corr2D: "+(runCorr2DTime*1.0e-6)+"ms), textures: "+(runTexturesTime*1.0e-6)+"ms, RGBA: "+ System.out.println(" - rot/derivs: "+(rotDerivsTime*1.0e-6)+"ms");
(runTexturesRBGATime*1.0e-6)+"ms"); System.out.println(" - tasks setup: "+(tasksSetupTime*1.0e-6)+"ms");
System.out.println(" - direct conversion: "+(firstGPUTime*1.0e-6)+"ms");
System.out.println(" - imclt: "+(runImcltTime*1.0e-6)+"ms");
System.out.println(" - corr2D: "+(runCorr2DTime*1.0e-6)+"ms");
System.out.println(" - textures: "+(runTexturesTime*1.0e-6)+"ms");
System.out.println(" - RGBA: "+(runTexturesRBGATime*1.0e-6)+"ms");
// get data back from GPU // get data back from GPU
float [][][] iclt_fimg = new float [GPUTileProcessor.NUM_CAMS][][]; float [][][] iclt_fimg = new float [GPUTileProcessor.NUM_CAMS][][];
for (int ncam = 0; ncam < iclt_fimg.length; ncam++) { for (int ncam = 0; ncam < iclt_fimg.length; ncam++) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment