Commit 58d01fd2 authored by Andrey Filippov's avatar Andrey Filippov

converting to multi-cam

parent 6a300e5a
......@@ -92,13 +92,37 @@
<artifactId>loci_tools</artifactId>
<version>6.1.0</version>
</dependency>
<!--
<dependency>
<groupId>com.drewnoakes</groupId>
<artifactId>metadata-extractor</artifactId>
<version>2.11.0</version>
<type>java-source</type>
</dependency> -->
<!-- https://mvnrepository.com/artifact/com.drewnoakes/metadata-extractor -->
<dependency>
<groupId>com.drewnoakes</groupId>
<artifactId>metadata-extractor</artifactId>
<version>2.11.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/net.sf.ehcache/ehcache-core -->
<!--
<dependency>
<groupId>net.sf.ehcache</groupId>
<artifactId>ehcache-core</artifactId>
<version>2.6.2</version>
</dependency>\
-->
<!-- https://mvnrepository.com/artifact/org.slf4j/jcl-over-slf4j -->
<!--
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>1.7.5</version>
</dependency>
-->
</dependencies>
<build>
......
......@@ -184,6 +184,9 @@ private Panel panel1,
public static QuadCLT QUAD_CLT_AUX = null;
public static TwoQuadCLT TWO_QUAD_CLT = null;
public static GPUTileProcessor GPU_TILE_PROCESSOR = null;
// Add macro for GPU_QUAD?
public static GPUTileProcessor.GpuQuad GPU_QUAD = null;
public static GPUTileProcessor.GpuQuad GPU_QUAD_AUX = null;
public static LwirReader LWIR_READER = null;
public static EyesisCorrectionParameters.DebayerParameters DEBAYER_PARAMETERS = new EyesisCorrectionParameters.DebayerParameters(
......@@ -5803,12 +5806,31 @@ private Panel panel1,
} //final int debugLevel);
}
if (GPU_QUAD == null) {
try {
GPU_QUAD = GPU_TILE_PROCESSOR. new GpuQuad(
2592,
1936,
4,
3);
} catch (Exception e) {
System.out.println("Failed to initialize GpuQuad class");
// TODO Auto-generated catch block
e.printStackTrace();
return false;
} //final int debugLevel);
}
// For now keep GPU_QUAD_AUX==null
if (COLOR_PROC_PARAMETERS_AUX == null) {
COLOR_PROC_PARAMETERS_AUX = COLOR_PROC_PARAMETERS.clone();
}
try {
TWO_QUAD_CLT.processCLTQuadCorrPairsGpu(
GPU_TILE_PROCESSOR,
// GPU_TILE_PROCESSOR,
GPU_QUAD, // GPUTileProcessor.GpuQuad gpuQuad_main,
GPU_QUAD_AUX, // GPUTileProcessor.GpuQuad gpuQuad_aux,
QUAD_CLT, // QuadCLT quadCLT_main,
QUAD_CLT_AUX, // QuadCLT quadCLT_aux,
CLT_PARAMETERS, // EyesisCorrectionParameters.DCTParameters dct_parameters,
......
......@@ -92,24 +92,16 @@ public class GPUTileProcessor {
static String GPU_RESOURCE_DIR = "kernels";
static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"};
// "*" - generated defines, first index - separately compiled unit
/* static String [][] GPU_SRC_FILES = {
{"*","dtt8x8.h","dtt8x8.cu"},
{"*","dtt8x8.h","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
*/
static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.cuh","TileProcessor.cuh"}};
static String GPU_CONVERT_DIRECT_NAME = "convert_direct"; // name in C code
static String GPU_IMCLT_ALL_NAME = "imclt_rbg_all";
static String GPU_CORRELATE2D_NAME = "correlate2D"; // name in C code
// static String GPU_TEXTURES_NAME = "textures_accumulate"; // name in C code
static String GPU_TEXTURES_NAME = "textures_nonoverlap"; // name in C code
static String GPU_RBGA_NAME = "generate_RBGA"; // name in C code
static String GPU_ROT_DERIV = "calc_rot_deriv"; // calculate rotation matrices and derivatives
static String GPU_SET_TILES_OFFSETS = "get_tiles_offsets"; // calculate pixel offsets and disparity distortions
static String GPU_CALC_REVERSE_DISTORTION = "calcReverseDistortionTable"; // calculate reverse radial distortion table from gpu_geometry_correction
// pass some defines to gpu source code with #ifdef JCUDA
public static int DTT_SIZE_LOG2 = 3;
public static int DTT_SIZE = (1 << DTT_SIZE_LOG2);
......@@ -171,8 +163,11 @@ public class GPUTileProcessor {
private CUfunction GPU_SET_TILES_OFFSETS_kernel = null;
private CUfunction GPU_CALC_REVERSE_DISTORTION_kernel = null;
CUmodule module; // to access constants memory
// CPU arrays of pointers to GPU memory
// Moved to GpuQuad class
/*
// These arrays may go to methods, they are here just to be able to free GPU memory if needed
private CUdeviceptr [] gpu_kernels_h = new CUdeviceptr[NUM_CAMS];
private CUdeviceptr [] gpu_kernel_offsets_h = new CUdeviceptr[NUM_CAMS];
......@@ -221,6 +216,10 @@ public class GPUTileProcessor {
public int num_task_tiles;
public int num_corr_tiles;
public int num_texture_tiles;
*/
// public GpuQuad [][] gpuQuad; // array of GpuQuad instances 2x2? ({{rgb, rgb_macro}, {lwir, lwir_macro})
// initilize with 4 dimensions each
public class TpTask {
public int task; // [0](+1) - generate 4 images, [4..9]+16..+512 - correlation pairs, 2 - generate texture tiles
public float target_disparity;
......@@ -273,7 +272,8 @@ public class GPUTileProcessor {
}
}
public class CltExtra{
/*
public class CltExtra{ // never used?
public float data_x; // kernel data is relative to this displacement X (0.5 pixel increments)
public float data_y; // kernel data is relative to this displacement Y (0.5 pixel increments)
public float center_x; // actual center X (use to find derivatives)
......@@ -328,7 +328,8 @@ public class GPUTileProcessor {
flt[indx++] = this.dyc_dy;
return flt;
}
};
}
*/
private static long getPointerAddress(CUdeviceptr p)
......@@ -451,7 +452,6 @@ public class GPUTileProcessor {
// Create the kernel functions (first - just test)
String [] func_names = {
// GPU_CONVERT_CORRECT_TILES_NAME,
GPU_CONVERT_DIRECT_NAME,
GPU_IMCLT_ALL_NAME,
GPU_CORRELATE2D_NAME,
......@@ -465,7 +465,6 @@ public class GPUTileProcessor {
func_names,
capability); // on my - 75
// GPU_CONVERT_CORRECT_TILES_kernel = functions[0];
GPU_CONVERT_DIRECT_kernel = functions[0];
GPU_IMCLT_ALL_kernel = functions[1];
GPU_CORRELATE2D_kernel = functions[2];
......@@ -476,7 +475,6 @@ public class GPUTileProcessor {
GPU_CALC_REVERSE_DISTORTION_kernel = functions[7];
System.out.println("GPU kernel functions initialized");
// System.out.println(GPU_CONVERT_CORRECT_TILES_kernel.toString());
System.out.println(GPU_CONVERT_DIRECT_kernel.toString());
System.out.println(GPU_IMCLT_ALL_kernel.toString());
System.out.println(GPU_CORRELATE2D_kernel.toString());
......@@ -486,11 +484,249 @@ public class GPUTileProcessor {
System.out.println(GPU_SET_TILES_OFFSETS_kernel.toString());
System.out.println(GPU_CALC_REVERSE_DISTORTION_kernel.toString());
// GPU data structures are now initialized through GpuQuad instances
}
public static String [] getCorrTitles() {
return new String []{"hor-top","hor-bottom","vert-left","vert-right","diag-main","diag-other"};
}
public static double [][] getCorr2DView(
int tilesX,
int tilesY,
int [] indices,
float [][] corr2d,
int [] wh){ // if is [2] - return width, height
if ((corr2d == null) || (corr2d.length == 0)) {
return new double [NUM_PAIRS][0];
}
int corr_size = (int)(Math.round(Math.sqrt(corr2d[0].length)));// make smaller later?
int width = tilesX * (corr_size + 1) + 1;
int height = tilesY * (corr_size + 1) + 1;
double [][] data = new double [NUM_PAIRS][];
data[0] = new double[height*width];
for (int ty = 0; ty < tilesY; ty++) {
for (int tx = 0; tx < tilesX; tx++) {
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
data[0][(ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1)] = Double.NaN;
}
}
}
}
for (int np = 1; np < NUM_PAIRS; np++) {
data[np] = data[0].clone();
}
for (int n = 0; n < indices.length; n++) {
int nt = indices[n] >> CORR_NTILE_SHIFT;
int np = indices[n] & CORR_PAIRS_MASK; // ((1 << CORR_NTILE_SHIFT) - 1); // np should
assert np < NUM_PAIRS : "invalid correllation pair";
int tx = nt % tilesX;
int ty = nt / tilesX;
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
//java.lang.ArrayIndexOutOfBoundsException: 20081634
int indx1 = (ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1);
int indx2 = i*corr_size+j;
// if ((indx1 > data[0].length) || (indx1 > data[0].length)){
// System.out.println("Bugggg!)");
// }
data[np][indx1] = corr2d[n][indx2];
}
}
}
if (wh != null) {
wh[0] = width;
wh[1] = height;
}
return data;
}
// private static CUfunction [] createFunctions(
private CUfunction [] createFunctions(
String [] sourceCodeUnits,
String [] kernelNames,
int capability
) throws IOException
{
CUfunction [] functions = new CUfunction [kernelNames.length];
byte[][] ptxDataUnits = new byte [sourceCodeUnits.length][];
boolean OK = false;
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
String sourceCode = sourceCodeUnits[cunit];
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
String options[] = {"--gpu-architecture=compute_"+capability};
try {
nvrtcCompileProgram(program, options.length, options);
OK = true;
} catch (Exception e) {
System.out.println("nvrtcCompileProgram() FAILED");
}
// Compilation log with errors/warnings
String programLog[] = new String[1];
nvrtcGetProgramLog(program, programLog);
String log = programLog[0].trim();
if (!log.isEmpty())
{
System.err.println("Program compilation log:\n" + log);
}
if (!OK) {
throw new IOException("Could not compile program");
}
// Get the PTX code of the compiled program (not the binary)
String[] ptx = new String[1];
nvrtcGetPTX(program, ptx);
nvrtcDestroyProgram(program);
ptxDataUnits[cunit] = ptx[0].getBytes();
System.out.println("ptxDataUnits["+cunit+"].length="+ptxDataUnits[cunit].length);
}
JITOptions jitOptions = new JITOptions();
jitOptions.putInt(CU_JIT_LOG_VERBOSE, 1);
CUlinkState state = new CUlinkState();
cuLinkCreate(jitOptions, state);
cuLinkAddFile(state, CU_JIT_INPUT_LIBRARY, LIBRARY_PATH, jitOptions);
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
cuLinkAddData(state, CU_JIT_INPUT_PTX, Pointer.to(ptxDataUnits[cunit]), ptxDataUnits[cunit].length, "input"+cunit+".ptx", jitOptions); // CUDA_ERROR_INVALID_PTX
}
long size[] = { 0 };
Pointer image = new Pointer();
JCudaDriver.setExceptionsEnabled(false);
int cuda_result = cuLinkComplete(state, image, size);
System.out.println("cuLinkComplete() -> "+cuda_result);
JCudaDriver.setExceptionsEnabled(true);
module = new CUmodule();
cuModuleLoadDataEx(module, image, 0, new int[0], Pointer.to(new int[0]));
cuLinkDestroy(state);
for (int i = 0; i < kernelNames.length; i++) {
// Find the function in the source by name, get its pointer
functions[i] = new CUfunction();
cuModuleGetFunction(functions[i] , module, kernelNames[i]);
}
return functions;
}
static String readFileAsString(String path)
{
byte[] encoded;
try {
encoded = Files.readAllBytes(Paths.get(path));
} catch (IOException e) {
return null;
}
return new String(encoded, StandardCharsets.UTF_8);
}
public class GpuQuad{ // quad camera description
public final int IMG_WIDTH;
public final int IMG_HEIGHT;
public final int NUM_CAMS;
public final int NUM_COLORS; // maybe should always be 3?
// public final GPUTileProcessor gPUTileProcessor;
// CPU arrays of pointers to GPU memory
// These arrays may go to methods, they are here just to be able to free GPU memory if needed
private CUdeviceptr [] gpu_kernels_h;
private CUdeviceptr [] gpu_kernel_offsets_h;
private CUdeviceptr [] gpu_bayer_h;
private CUdeviceptr [] gpu_clt_h;
private CUdeviceptr [] gpu_corr_images_h;
// GPU pointers to array of GPU pointers
private CUdeviceptr gpu_kernels;
private CUdeviceptr gpu_kernel_offsets;
private CUdeviceptr gpu_bayer;
private CUdeviceptr gpu_tasks;
private CUdeviceptr gpu_corrs;
private CUdeviceptr gpu_textures;
private CUdeviceptr gpu_clt;
private CUdeviceptr gpu_4_images;
private CUdeviceptr gpu_corr_indices;
private CUdeviceptr gpu_num_corr_tiles;
private CUdeviceptr gpu_texture_indices_ovlp;
private CUdeviceptr gpu_num_texture_ovlp;
private CUdeviceptr gpu_texture_indices;
private CUdeviceptr gpu_texture_indices_len;
private CUdeviceptr gpu_diff_rgb_combo;
private CUdeviceptr gpu_color_weights;
private CUdeviceptr gpu_generate_RBGA_params;
private CUdeviceptr gpu_woi;
private CUdeviceptr gpu_textures_rgba;
private CUdeviceptr gpu_correction_vector;
private CUdeviceptr gpu_rot_deriv;
private CUdeviceptr gpu_geometry_correction;
private CUdeviceptr gpu_rByRDist;
private CUdeviceptr gpu_active_tiles;
private CUdeviceptr gpu_num_active_tiles;
private int mclt_stride;
private int corr_stride;
private int imclt_stride;
private int texture_stride;
private int texture_stride_rgba;
private int num_task_tiles;
private int num_corr_tiles;
private int num_texture_tiles;
public GpuQuad(
// final GPUTileProcessor gPUTileProcessor,
final int img_width,
final int img_height,
final int num_cams,
final int num_colors
) {
// this.gPUTileProcessor = gPUTileProcessor;
IMG_WIDTH = img_width;
IMG_HEIGHT = img_height;
NUM_CAMS = num_cams;
NUM_COLORS = num_colors; // maybe should always be 3?
// CPU arrays of pointers to GPU memory
// These arrays may go to methods, they are here just to be able to free GPU memory if needed
gpu_kernels_h = new CUdeviceptr[NUM_CAMS];
gpu_kernel_offsets_h = new CUdeviceptr[NUM_CAMS];
gpu_bayer_h = new CUdeviceptr[NUM_CAMS];
gpu_clt_h = new CUdeviceptr[NUM_CAMS];
gpu_corr_images_h= new CUdeviceptr[NUM_CAMS];
// GPU pointers to array of GPU pointers
gpu_kernels = new CUdeviceptr();
gpu_kernel_offsets = new CUdeviceptr();
gpu_bayer = new CUdeviceptr();
gpu_tasks = new CUdeviceptr(); // allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT
gpu_corrs = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.FLOAT
gpu_textures = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
gpu_clt = new CUdeviceptr();
gpu_4_images = new CUdeviceptr();
gpu_corr_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_num_corr_tiles = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_texture_indices_ovlp =new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_num_texture_ovlp = new CUdeviceptr(); // 8 ints
gpu_texture_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_texture_indices_len = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_diff_rgb_combo = new CUdeviceptr(); // 1 int
gpu_color_weights = new CUdeviceptr(); // allocate 3 * Sizeof.FLOAT
gpu_generate_RBGA_params =new CUdeviceptr(); // allocate 5 * Sizeof.FLOAT
gpu_woi = new CUdeviceptr(); // 4 integers (x, y, width, height) Rectangle - in tiles
gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
gpu_correction_vector= new CUdeviceptr();
gpu_rot_deriv= new CUdeviceptr(); // used internally by device, may be read to CPU for testing
gpu_geometry_correction= new CUdeviceptr();
gpu_rByRDist= new CUdeviceptr(); // calculated once for the camera distortion model in CPU (move to GPU?)
gpu_active_tiles = new CUdeviceptr(); // TILESX*TILESY*sizeof(int)
gpu_num_active_tiles = new CUdeviceptr(); // 1 int
// Init data arrays for all kernels
int tilesX = IMG_WIDTH / DTT_SIZE;
int tilesY = IMG_HEIGHT / DTT_SIZE;
long [] device_stride = new long [1];
for (int ncam = 0; ncam < NUM_CAMS; ncam++) {
gpu_kernels_h[ncam] = new CUdeviceptr();
cuMemAlloc(gpu_kernels_h[ncam],KERN_SIZE * Sizeof.FLOAT ); // public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
......@@ -504,7 +740,6 @@ public class GPUTileProcessor {
IMG_HEIGHT, // long Height,
Sizeof.FLOAT); // int ElementSizeBytes)
mclt_stride = (int)(device_stride[0] / Sizeof.FLOAT);
gpu_corr_images_h[ncam] = new CUdeviceptr();
cuMemAllocPitch (
gpu_corr_images_h[ncam], // CUdeviceptr dptr,
......@@ -515,7 +750,6 @@ public class GPUTileProcessor {
imclt_stride = (int)(device_stride[0] / Sizeof.FLOAT);
gpu_clt_h[ncam] = new CUdeviceptr();
cuMemAlloc(gpu_clt_h[ncam],tilesY * tilesX * NUM_COLORS * 4 * DTT_SIZE * DTT_SIZE * Sizeof.FLOAT ); // public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
}
// now create device arrays pointers
if (Sizeof.POINTER != Sizeof.LONG) {
......@@ -558,7 +792,7 @@ public class GPUTileProcessor {
// Set task array
cuMemAlloc(gpu_tasks, tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT);
//=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============
//=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============
// Set corrs array
cuMemAlloc(gpu_corr_indices, tilesX * tilesY * NUM_PAIRS * Sizeof.FLOAT);
cuMemAlloc(gpu_num_corr_tiles, 1 * Sizeof.FLOAT);
......@@ -589,7 +823,6 @@ public class GPUTileProcessor {
NUM_PAIRS * tilesX * tilesY, // long Height,
Sizeof.FLOAT); // int ElementSizeBytes)
corr_stride = (int)(device_stride[0] / Sizeof.FLOAT);
int max_texture_size = (NUM_COLORS + 1 + (NUM_CAMS + NUM_COLORS + 1)) * (2 * DTT_SIZE)* (2 * DTT_SIZE);
cuMemAllocPitch (
gpu_textures, // CUdeviceptr dptr,
......@@ -601,7 +834,6 @@ public class GPUTileProcessor {
int max_rgba_width = (tilesX + 1) * DTT_SIZE;
int max_rgba_height = (tilesY + 1) * DTT_SIZE;
int max_rbga_slices = NUM_COLORS + 1;
cuMemAllocPitch (
gpu_textures_rgba, // CUdeviceptr dptr,
device_stride, // long[] pPitch,
......@@ -609,7 +841,6 @@ public class GPUTileProcessor {
max_rgba_height * max_rbga_slices, // long Height,
Sizeof.FLOAT); // int ElementSizeBytes)
texture_stride_rgba = (int)(device_stride[0] / Sizeof.FLOAT);
}
public void setGeometryCorrection(GeometryCorrection gc,
......@@ -681,11 +912,7 @@ public class GPUTileProcessor {
return texture_indices;
}
//texture_indices
//texture_indices
public void setConvolutionKernel(
float [] kernel, // [tileY][tileX][color][..]
float [] kernel_offsets,
......@@ -900,30 +1127,25 @@ public class GPUTileProcessor {
num_tiles--;
}
}
//nextInt(int bound)
//nextInt(int bound)
}
if (dbg1) {
// mask[(woi.y-1) * tilesX + (woi.x-1)] = true;
mask[(woi.y+woi.height) * tilesX + (woi.x+woi.width)] = true;
num_tiles += 1; // 2;
}
// TpTask [] tp_tasks = new TpTask[tilesX*tilesY];
TpTask [] tp_tasks = new TpTask[num_tiles];
int indx = 0;
for (int ty = 0; ty < tilesY; ty++) {
for (int tx = 0; tx < tilesX; tx++) if (mask[ty * tilesX + tx]) {
// tp_tasks[indx] = new TpTask(tx,ty, target_disparities[indx], 1); // task == 1 for now
// Only generate for non-empty tasks, use 1 empty empty as a terminator?
// Only generate for non-empty tasks, use 1 empty empty as a terminator?
tp_tasks[indx] = new TpTask(tx,ty, target_disparities[indx],
((out_images[indx] & 0x0f) << 0) |
((corr_mask [indx] & 0x3f) << 4)
); // task == 1 for now
indx++;
}
}
......@@ -985,7 +1207,6 @@ public class GPUTileProcessor {
num_textures++;
}
}
int [] iarr = new int[num_textures];
num_textures = 0;
int b = (1 << LIST_TEXTURE_BIT);
......@@ -998,67 +1219,7 @@ public class GPUTileProcessor {
return iarr;
}
public static String [] getCorrTitles() {
return new String []{"hor-top","hor-bottom","vert-left","vert-right","diag-main","diag-other"};
}
public static double [][] getCorr2DView(
int tilesX,
int tilesY,
int [] indices,
float [][] corr2d,
int [] wh){ // if is [2] - return width, height
if ((corr2d == null) || (corr2d.length == 0)) {
return new double [NUM_PAIRS][0];
}
int corr_size = (int)(Math.round(Math.sqrt(corr2d[0].length)));// make smaller later?
int width = tilesX * (corr_size + 1) + 1;
int height = tilesY * (corr_size + 1) + 1;
double [][] data = new double [NUM_PAIRS][];
data[0] = new double[height*width];
for (int ty = 0; ty < tilesY; ty++) {
for (int tx = 0; tx < tilesX; tx++) {
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
data[0][(ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1)] = Double.NaN;
}
}
}
}
for (int np = 1; np < NUM_PAIRS; np++) {
data[np] = data[0].clone();
}
for (int n = 0; n < indices.length; n++) {
int nt = indices[n] >> CORR_NTILE_SHIFT;
int np = indices[n] & CORR_PAIRS_MASK; // ((1 << CORR_NTILE_SHIFT) - 1); // np should
assert np < NUM_PAIRS : "invalid correllation pair";
int tx = nt % tilesX;
int ty = nt / tilesX;
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
//java.lang.ArrayIndexOutOfBoundsException: 20081634
int indx1 = (ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1);
int indx2 = i*corr_size+j;
// if ((indx1 > data[0].length) || (indx1 > data[0].length)){
// System.out.println("Bugggg!)");
// }
data[np][indx1] = corr2d[n][indx2];
}
}
}
if (wh != null) {
wh[0] = width;
wh[1] = height;
}
return data;
}
// All data is already copied to GPU memory
// All data is already copied to GPU memory
public void execRotDerivs() {
if (GPU_ROT_DERIV_kernel == null)
......@@ -1083,6 +1244,7 @@ public class GPUTileProcessor {
kernelParameters, null); // Kernel- and extra parameters
cuCtxSynchronize(); // remove later
}
public void execCalcReverseDistortions() {
if (GPU_CALC_REVERSE_DISTORTION_kernel == null)
{
......@@ -1201,7 +1363,6 @@ public class GPUTileProcessor {
cuCtxSynchronize();
}
public void execCorr2D(
double [] scales,
double fat_zero,
......@@ -1217,8 +1378,6 @@ public class GPUTileProcessor {
float fscale0 = (float) scales[0];
float fscale1 = (num_colors >1)?((float) scales[1]):0.0f;
float fscale2 = (num_colors >2)?((float) scales[2]):0.0f;
// int [] GridFullWarps = {(num_corr_tiles + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1};
// int [] ThreadsFullWarps = {CORR_THREADS_PER_TILE, CORR_TILES_PER_BLOCK, 1};
int [] GridFullWarps = {1, 1, 1};
int [] ThreadsFullWarps = {1, 1, 1};
Pointer kernelParameters = Pointer.to(
......@@ -1300,11 +1459,6 @@ public class GPUTileProcessor {
Pointer.to(new int[] {num_colors}), // int colors, // number of colors (3/1)
Pointer.to(new int[] {iis_lwir}), // int is_lwir, // do not perform shot correction
Pointer.to(gpu_generate_RBGA_params), // float generate_RBGA_params[5],
// Pointer.to(new float[] {(float) min_shot}), // float min_shot, // 10.0
// Pointer.to(new float[] {(float) scale_shot}), // float scale_shot, // 3.0
// Pointer.to(new float[] {(float) diff_sigma}), // float diff_sigma, // pixel value/pixel change
// Pointer.to(new float[] {(float) diff_threshold}),// float diff_threshold, // pixel value/pixel change
// Pointer.to(new float[] {(float) min_agree}), // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G
Pointer.to(new int[] { idust_remove }), // int dust_remove, // Do not reduce average weight when only one image differes much from the average
Pointer.to(new int[] {0}), // int keep_weights, // return channel weights after A in RGBA
......@@ -1371,15 +1525,8 @@ public class GPUTileProcessor {
Pointer.to(new int[] { num_colors }),
Pointer.to(new int[] { iis_lwir }),
Pointer.to(gpu_generate_RBGA_params), // float generate_RBGA_params[5],
// Pointer.to(new float[] {(float) min_shot}), // float min_shot, // 10.0
// Pointer.to(new float[] {(float) scale_shot}), // float scale_shot, // 3.0
// Pointer.to(new float[] {(float) diff_sigma}), // float diff_sigma, // pixel value/pixel change
// Pointer.to(new float[] {(float) diff_threshold}),// float diff_threshold, // pixel value/pixel change
// Pointer.to(new float[] {(float) min_agree}), // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G
Pointer.to(new int[] { idust_remove }),
// Pointer.to(new int[] { 0}), // texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
// Pointer.to(new int[] {0}), // gpu_textures),
Pointer.to(new int[] {texture_stride}), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_textures),
Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
......@@ -1393,9 +1540,6 @@ public class GPUTileProcessor {
cuCtxSynchronize();
}
public float [][] getCorr2D(int corr_rad){
int corr_size = (2 * corr_rad + 1) * (2 * corr_rad + 1);
float [] cpu_corrs = new float [ num_corr_tiles * corr_size];
......@@ -1419,6 +1563,7 @@ public class GPUTileProcessor {
}
return corrs;
}
public int [] getCorrIndices() {
float [] fnum_corrs = new float[1];
cuMemcpyDtoH(Pointer.to(fnum_corrs), gpu_num_corr_tiles, 1 * Sizeof.FLOAT);
......@@ -1437,17 +1582,6 @@ public class GPUTileProcessor {
// read extra data for macro generation: 4 DIFFs, 4 of R, 4 of B, 4 of G
public float [][] getExtra(){
int [] texture_indices = getTextureIndices();
/*
float [] fnum_tiles = new float[1];
cuMemcpyDtoH(Pointer.to(fnum_tiles), gpu_num_texture_ovlp, 1 * Sizeof.FLOAT);
int num_tiles = Float.floatToIntBits(fnum_tiles[0]);
float [] ftiles = new float[num_tiles];
cuMemcpyDtoH(Pointer.to(fnum_tiles), gpu_texture_indices_ovlp, num_tiles * Sizeof.FLOAT);
int [] tiles = new int[num_tiles];
for (int i = 0; i < num_tiles; i++) {
tiles[i] = Float.floatToIntBits(ftiles[i]);
}
*/
int num_tile_extra = NUM_CAMS*(NUM_COLORS+1);
float [] diff_rgb_combo = new float[texture_indices.length * num_tile_extra];
cuMemcpyDtoH(Pointer.to(diff_rgb_combo), gpu_diff_rgb_combo, diff_rgb_combo.length * Sizeof.FLOAT);
......@@ -1492,7 +1626,6 @@ public class GPUTileProcessor {
copy_rbga.srcDevice = gpu_textures_rgba;
copy_rbga.srcPitch = texture_stride_rgba * Sizeof.FLOAT;
copy_rbga.dstMemoryType = CUmemorytype.CU_MEMORYTYPE_HOST;
// copy_woi.dstHost = Pointer.to(rslt);
copy_rbga.dstPitch = woi.width * Sizeof.FLOAT;
copy_rbga.WidthInBytes = woi.width * Sizeof.FLOAT;
......@@ -1516,7 +1649,6 @@ public class GPUTileProcessor {
int texture_slice_size = (2 * DTT_SIZE)* (2 * DTT_SIZE); // number of (float) elements in a single slice of a tile
int texture_tile_size = texture_slices * texture_slice_size; // number of (float) elements in a multi-slice tile
int texture_size = texture_tile_size * num_texture_tiles; // number of (float) elements in the whole texture
// float [] cpu_textures = new float [ num_texture_tiles * texture_size];
float [] cpu_textures = new float [texture_size];
CUDA_MEMCPY2D copyD2H = new CUDA_MEMCPY2D();
copyD2H.srcMemoryType = CUmemorytype.CU_MEMORYTYPE_DEVICE;
......@@ -1588,7 +1720,6 @@ public class GPUTileProcessor {
return textures;
}
public double [][][][] doubleTextures(
Rectangle woi,
int [] indices,
......@@ -1617,9 +1748,6 @@ public class GPUTileProcessor {
return textures;
}
public float [][] getRBG (int ncam){
int height = (IMG_HEIGHT + DTT_SIZE);
int width = (IMG_WIDTH + DTT_SIZE);
......@@ -1649,98 +1777,6 @@ public class GPUTileProcessor {
return fimg;
}
// private static CUfunction [] createFunctions(
private CUfunction [] createFunctions(
String [] sourceCodeUnits,
String [] kernelNames,
int capability
) throws IOException
{
CUfunction [] functions = new CUfunction [kernelNames.length];
byte[][] ptxDataUnits = new byte [sourceCodeUnits.length][];
boolean OK = false;
// for (String sourceCode: sourceCodeUnits) {
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
String sourceCode = sourceCodeUnits[cunit];
// System.out.print(sourceCode);
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
String options[] = {"--gpu-architecture=compute_"+capability};
try {
nvrtcCompileProgram(program, options.length, options);
OK = true;
} catch (Exception e) {
System.out.println("nvrtcCompileProgram() FAILED");
}
// Compilation log with errors/warnings
String programLog[] = new String[1];
nvrtcGetProgramLog(program, programLog);
String log = programLog[0].trim();
if (!log.isEmpty())
{
System.err.println("Program compilation log:\n" + log);
}
if (!OK) {
throw new IOException("Could not compile program");
}
// Get the PTX code of the compiled program (not the binary)
String[] ptx = new String[1];
nvrtcGetPTX(program, ptx);
nvrtcDestroyProgram(program);
// byte[] ptxData = ptx[0].getBytes();
ptxDataUnits[cunit] = ptx[0].getBytes();
System.out.println("ptxDataUnits["+cunit+"].length="+ptxDataUnits[cunit].length);
// System.out.println( ptx[0]);
}
JITOptions jitOptions = new JITOptions();
jitOptions.putInt(CU_JIT_LOG_VERBOSE, 1);
CUlinkState state = new CUlinkState();
cuLinkCreate(jitOptions, state);
cuLinkAddFile(state, CU_JIT_INPUT_LIBRARY, LIBRARY_PATH, jitOptions);
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
// cuLinkAddData(state, CU_JIT_INPUT_PTX, Pointer.to(ptxData), ptxData.length, "input.ptx", jitOptions); // CUDA_ERROR_INVALID_PTX
cuLinkAddData(state, CU_JIT_INPUT_PTX, Pointer.to(ptxDataUnits[cunit]), ptxDataUnits[cunit].length, "input"+cunit+".ptx", jitOptions); // CUDA_ERROR_INVALID_PTX
// cuLinkAddData(state, CU_JIT_INPUT_PTX, Pointer.to(ptxDataUnits[cunit]), ptxDataUnits[cunit].length, "input.ptx", jitOptions); // CUDA_ERROR_INVALID_PTX
}
// cuLinkAddFile(state, CU_JIT_INPUT_LIBRARY, LIBRARY_PATH, jitOptions);
long size[] = { 0 };
Pointer image = new Pointer();
JCudaDriver.setExceptionsEnabled(false);
int cuda_result = cuLinkComplete(state, image, size);
System.out.println("cuLinkComplete() -> "+cuda_result);
JCudaDriver.setExceptionsEnabled(true);
module = new CUmodule();
cuModuleLoadDataEx(module, image, 0, new int[0], Pointer.to(new int[0]));
cuLinkDestroy(state);
for (int i = 0; i < kernelNames.length; i++) {
// Find the function in the source by name, get its pointer
functions[i] = new CUfunction();
cuModuleGetFunction(functions[i] , module, kernelNames[i]);
}
return functions;
}
static String readFileAsString(String path)
{
byte[] encoded;
try {
encoded = Files.readAllBytes(Paths.get(path));
} catch (IOException e) {
return null;
}
return new String(encoded, StandardCharsets.UTF_8);
}
public void getTileSubcamOffsets(
final TpTask[] tp_tasks, // will use // modify to have offsets for 8 cameras
final GeometryCorrection geometryCorrection_main,
......@@ -1937,9 +1973,6 @@ public class GPUTileProcessor {
return lpf;
}
} // end of public class GpuQuad
}
} // end of public class GPUTileProcessor
......@@ -452,7 +452,9 @@ public class TwoQuadCLT {
}
public void processCLTQuadCorrPairsGpu(
GPUTileProcessor gPUTileProcessor,
// GPUTileProcessor gPUTileProcessor,
GPUTileProcessor.GpuQuad gpuQuad_main,
GPUTileProcessor.GpuQuad gpuQuad_aux,
QuadCLT quadCLT_main,
QuadCLT quadCLT_aux,
CLTParameters clt_parameters,
......@@ -516,7 +518,9 @@ public class TwoQuadCLT {
// Tempporarily processing individaully with the old code
processCLTQuadCorrPairGpu(
gPUTileProcessor, // GPUTileProcessor gPUTileProcessor,
// gPUTileProcessor, // GPUTileProcessor gPUTileProcessor,
gpuQuad_main, // GPUTileProcessor.GpuQuad gpuQuad_main,
gpuQuad_aux, // GPUTileProcessor.GpuQuad gpuQuad_aux,
quadCLT_main, // QuadCLT quadCLT_main,
quadCLT_aux, // QuadCLT quadCLT_aux,
imp_srcs_main, // ImagePlus [] imp_quad_main,
......@@ -1934,7 +1938,9 @@ public class TwoQuadCLT {
}
public ImagePlus [] processCLTQuadCorrPairGpu(
GPUTileProcessor gPUTileProcessor,
// GPUTileProcessor gPUTileProcessor,
GPUTileProcessor.GpuQuad gpuQuad_main,
GPUTileProcessor.GpuQuad gpuQuad_aux,
QuadCLT quadCLT_main,
QuadCLT quadCLT_aux,
ImagePlus [] imp_quad_main,
......@@ -1980,17 +1986,17 @@ public class TwoQuadCLT {
image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_g),
image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_m)
};
gPUTileProcessor.setLpfRbg(
gpuQuad_main.setLpfRbg(
lpf_rgb);
float [] lpf_flat = image_dtt.floatGetCltLpfFd(clt_parameters.getGpuCorrSigma(is_mono));
gPUTileProcessor.setLpfCorr(
gpuQuad_main.setLpfCorr(
"lpf_corr", // String const_name, // "lpf_corr"
lpf_flat);
float [] lpf_rb_flat = image_dtt.floatGetCltLpfFd(clt_parameters.getGpuCorrRBSigma(is_mono));
gPUTileProcessor.setLpfCorr(
gpuQuad_main.setLpfCorr(
"lpf_rb_corr", // String const_name, // "lpf_corr"
lpf_rb_flat);
......@@ -2026,11 +2032,11 @@ public class TwoQuadCLT {
debugLevel); // final int debugLevel);
gPUTileProcessor.setConvolutionKernels(
gpuQuad_main.setConvolutionKernels(
(use_aux?quadCLT_aux.getCLTKernels() : quadCLT_main.getCLTKernels()), // double [][][][][][] clt_kernels,
false); // boolean force)
gPUTileProcessor.setBayerImages(
gpuQuad_main.setBayerImages(
(use_aux? quadCLT_aux.image_data: quadCLT_main.image_data), // double [][][] bayer_data,
true); // boolean force);
......@@ -2040,7 +2046,7 @@ public class TwoQuadCLT {
clt_parameters.gpu_woi_ty,
clt_parameters.gpu_woi_twidth,
clt_parameters.gpu_woi_theight);
GPUTileProcessor.TpTask [] tp_tasks = gPUTileProcessor.setFullFrameImages(
GPUTileProcessor.TpTask [] tp_tasks = gpuQuad_main.setFullFrameImages(
false, // boolean calc_offsets, // old way, now not needed with GPU calculation
twoi, // Rectangle woi,
clt_parameters.gpu_woi_round, // boolean round_woi,
......@@ -2096,26 +2102,14 @@ public class TwoQuadCLT {
}
}
gPUTileProcessor.setTasks(
gpuQuad_main.setTasks(
tp_tasks, // TpTask [] tile_tasks,
use_aux); // boolean use_aux)
// int [] corr_indices = gPUTileProcessor.getCorrTasks(
// tp_tasks);
// corr_indices array of integers to be passed to GPU
// gPUTileProcessor.setCorrIndices(corr_indices);
/*
int [] texture_indices = gPUTileProcessor.getTextureTasks(
tp_tasks);
gPUTileProcessor.setTextureIndices(
texture_indices);
*/
gPUTileProcessor.setGeometryCorrection(
gpuQuad_main.setGeometryCorrection(
quadCLT_main.getGeometryCorrection(),
false); // boolean use_java_rByRDist) { // false - use newer GPU execCalcReverseDistortions); // once
gPUTileProcessor.setExtrinsicsVector(quadCLT_main.getGeometryCorrection().getCorrVector()); // for each new image
gpuQuad_main.setExtrinsicsVector(quadCLT_main.getGeometryCorrection().getCorrVector()); // for each new image
/* // TODO: calculate from the camera geometry?
double[][] port_offsets = { // used only in textures to scale differences
......@@ -2129,33 +2123,33 @@ public class TwoQuadCLT {
System.out.println("\n------------ Running GPU "+NREPEAT+" times ----------------");
long startGPU=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execCalcReverseDistortions();
gpuQuad_main.execCalcReverseDistortions();
}
long startRotDerivs=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execRotDerivs();
gpuQuad_main.execRotDerivs();
}
long startTasksSetup=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execSetTilesOffsets();
gpuQuad_main.execSetTilesOffsets();
}
long startDirectConvert=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execConvertDirect();
gpuQuad_main.execConvertDirect();
}
// run imclt;
long startIMCLT=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execImcltRbgAll(quadCLT_main.isMonochrome());
gpuQuad_main.execImcltRbgAll(quadCLT_main.isMonochrome());
}
long endImcltTime = System.nanoTime();
// run correlation
long startCorr2d=System.nanoTime(); // System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execCorr2D(
for (int i = 0; i < NREPEAT; i++ ) gpuQuad_main.execCorr2D(
scales,// double [] scales,
fat_zero, // double fat_zero);
clt_parameters.gpu_corr_rad); // int corr_radius
......@@ -2163,7 +2157,7 @@ public class TwoQuadCLT {
long endCorr2d = System.nanoTime();
// run textures
long startTextures = System.nanoTime(); // System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execTextures(
for (int i = 0; i < NREPEAT; i++ ) gpuQuad_main.execTextures(
col_weights, // double [] color_weights,
quadCLT_main.isLwir(), // boolean is_lwir,
clt_parameters.min_shot, // double min_shot, // 10.0
......@@ -2176,7 +2170,7 @@ public class TwoQuadCLT {
// run texturesRBGA
long startTexturesRBGA = System.nanoTime(); // System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execRBGA(
for (int i = 0; i < NREPEAT; i++ ) gpuQuad_main.execRBGA(
col_weights, // double [] color_weights,
quadCLT_main.isLwir(), // boolean is_lwir,
clt_parameters.min_shot, // double min_shot, // 10.0
......@@ -2212,7 +2206,7 @@ public class TwoQuadCLT {
// get data back from GPU
float [][][] iclt_fimg = new float [GPUTileProcessor.NUM_CAMS][][];
for (int ncam = 0; ncam < iclt_fimg.length; ncam++) {
iclt_fimg[ncam] = gPUTileProcessor.getRBG(ncam);
iclt_fimg[ncam] = gpuQuad_main.getRBG(ncam);
}
int out_width = GPUTileProcessor.IMG_WIDTH + GPUTileProcessor.DTT_SIZE;
......@@ -2228,7 +2222,7 @@ public class TwoQuadCLT {
extra_titles[g * GPUTileProcessor.NUM_CAMS+ncam]= extra_group_titles[g]+"-"+ncam;
}
}
float [][] extra = gPUTileProcessor.getExtra();
float [][] extra = gpuQuad_main.getExtra();
(new ShowDoubleFloatArrays()).showArrays(
extra,
tilesX,
......@@ -2260,8 +2254,8 @@ public class TwoQuadCLT {
//show_corr
int [] wh = new int[2];
if (clt_parameters.show_corr) {
int [] corr_indices = gPUTileProcessor.getCorrIndices();
float [][] corr2D = gPUTileProcessor.getCorr2D(
int [] corr_indices = gpuQuad_main.getCorrIndices();
float [][] corr2D = gpuQuad_main.getCorr2D(
clt_parameters.gpu_corr_rad); // int corr_rad);
// convert to 6-layer image using tasks
double [][] dbg_corr = GPUTileProcessor.getCorr2DView(
......@@ -2344,7 +2338,7 @@ public class TwoQuadCLT {
// Use GPU prepared RBGA
if (clt_parameters.show_rgba_color) {
Rectangle woi = new Rectangle();
float [][] rbga = gPUTileProcessor.getRBGA(
float [][] rbga = gpuQuad_main.getRBGA(
(is_mono?1:3), // int num_colors,
woi);
(new ShowDoubleFloatArrays()).showArrays(
......@@ -2413,12 +2407,12 @@ public class TwoQuadCLT {
if (clt_parameters.show_rgba_color && (debugLevel > 100)) { // disabling
int numcol = quadCLT_main.isMonochrome()?1:3;
int ports = imp_quad_main.length;
int [] texture_indices = gPUTileProcessor.getTextureIndices();
int [] texture_indices = gpuQuad_main.getTextureIndices();
int num_src_slices = numcol + 1 + (clt_parameters.keep_weights?(ports + numcol + 1):0); // 12 ; // calculate
// float [][][] ftextures = gPUTileProcessor.getTextures(
// (is_mono?1:3), // int num_colors,
// clt_parameters.keep_weights); // boolean keep_weights);
float [] flat_textures = gPUTileProcessor.getFlatTextures(
float [] flat_textures = gpuQuad_main.getFlatTextures(
texture_indices.length,
(is_mono?1:3), // int num_colors,
clt_parameters.keep_weights); // boolean keep_weights);
......@@ -2447,7 +2441,7 @@ public class TwoQuadCLT {
}
}
}
double [][][][] texture_tiles = gPUTileProcessor.doubleTextures(
double [][][][] texture_tiles = gpuQuad_main.doubleTextures(
new Rectangle(0, 0, tilesX, tilesY), // Rectangle woi,
texture_indices, // int [] indices,
flat_textures, // float [][][] ftextures,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment