Commit 58d01fd2 authored by Andrey Filippov's avatar Andrey Filippov

converting to multi-cam

parent 6a300e5a
...@@ -92,13 +92,37 @@ ...@@ -92,13 +92,37 @@
<artifactId>loci_tools</artifactId> <artifactId>loci_tools</artifactId>
<version>6.1.0</version> <version>6.1.0</version>
</dependency> </dependency>
<!--
<dependency> <dependency>
<groupId>com.drewnoakes</groupId> <groupId>com.drewnoakes</groupId>
<artifactId>metadata-extractor</artifactId> <artifactId>metadata-extractor</artifactId>
<version>2.11.0</version> <version>2.11.0</version>
<type>java-source</type> <type>java-source</type>
</dependency> -->
<!-- https://mvnrepository.com/artifact/com.drewnoakes/metadata-extractor -->
<dependency>
<groupId>com.drewnoakes</groupId>
<artifactId>metadata-extractor</artifactId>
<version>2.11.0</version>
</dependency> </dependency>
<!-- https://mvnrepository.com/artifact/net.sf.ehcache/ehcache-core -->
<!--
<dependency>
<groupId>net.sf.ehcache</groupId>
<artifactId>ehcache-core</artifactId>
<version>2.6.2</version>
</dependency>\
-->
<!-- https://mvnrepository.com/artifact/org.slf4j/jcl-over-slf4j -->
<!--
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>jcl-over-slf4j</artifactId>
<version>1.7.5</version>
</dependency>
-->
</dependencies> </dependencies>
<build> <build>
......
...@@ -184,6 +184,9 @@ private Panel panel1, ...@@ -184,6 +184,9 @@ private Panel panel1,
public static QuadCLT QUAD_CLT_AUX = null; public static QuadCLT QUAD_CLT_AUX = null;
public static TwoQuadCLT TWO_QUAD_CLT = null; public static TwoQuadCLT TWO_QUAD_CLT = null;
public static GPUTileProcessor GPU_TILE_PROCESSOR = null; public static GPUTileProcessor GPU_TILE_PROCESSOR = null;
// Add macro for GPU_QUAD?
public static GPUTileProcessor.GpuQuad GPU_QUAD = null;
public static GPUTileProcessor.GpuQuad GPU_QUAD_AUX = null;
public static LwirReader LWIR_READER = null; public static LwirReader LWIR_READER = null;
public static EyesisCorrectionParameters.DebayerParameters DEBAYER_PARAMETERS = new EyesisCorrectionParameters.DebayerParameters( public static EyesisCorrectionParameters.DebayerParameters DEBAYER_PARAMETERS = new EyesisCorrectionParameters.DebayerParameters(
...@@ -5803,12 +5806,31 @@ private Panel panel1, ...@@ -5803,12 +5806,31 @@ private Panel panel1,
} //final int debugLevel); } //final int debugLevel);
} }
if (GPU_QUAD == null) {
try {
GPU_QUAD = GPU_TILE_PROCESSOR. new GpuQuad(
2592,
1936,
4,
3);
} catch (Exception e) {
System.out.println("Failed to initialize GpuQuad class");
// TODO Auto-generated catch block
e.printStackTrace();
return false;
} //final int debugLevel);
}
// For now keep GPU_QUAD_AUX==null
if (COLOR_PROC_PARAMETERS_AUX == null) { if (COLOR_PROC_PARAMETERS_AUX == null) {
COLOR_PROC_PARAMETERS_AUX = COLOR_PROC_PARAMETERS.clone(); COLOR_PROC_PARAMETERS_AUX = COLOR_PROC_PARAMETERS.clone();
} }
try { try {
TWO_QUAD_CLT.processCLTQuadCorrPairsGpu( TWO_QUAD_CLT.processCLTQuadCorrPairsGpu(
GPU_TILE_PROCESSOR, // GPU_TILE_PROCESSOR,
GPU_QUAD, // GPUTileProcessor.GpuQuad gpuQuad_main,
GPU_QUAD_AUX, // GPUTileProcessor.GpuQuad gpuQuad_aux,
QUAD_CLT, // QuadCLT quadCLT_main, QUAD_CLT, // QuadCLT quadCLT_main,
QUAD_CLT_AUX, // QuadCLT quadCLT_aux, QUAD_CLT_AUX, // QuadCLT quadCLT_aux,
CLT_PARAMETERS, // EyesisCorrectionParameters.DCTParameters dct_parameters, CLT_PARAMETERS, // EyesisCorrectionParameters.DCTParameters dct_parameters,
......
...@@ -92,24 +92,16 @@ public class GPUTileProcessor { ...@@ -92,24 +92,16 @@ public class GPUTileProcessor {
static String GPU_RESOURCE_DIR = "kernels"; static String GPU_RESOURCE_DIR = "kernels";
static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"}; static String [] GPU_KERNEL_FILES = {"dtt8x8.cuh","TileProcessor.cuh"};
// "*" - generated defines, first index - separately compiled unit // "*" - generated defines, first index - separately compiled unit
/* static String [][] GPU_SRC_FILES = {
{"*","dtt8x8.h","dtt8x8.cu"},
{"*","dtt8x8.h","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
*/
static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}}; static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","geometry_correction.cu","TileProcessor.h","TileProcessor.cuh"}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.h","dtt8x8.cu","geometry_correction.h","TileProcessor.h","TileProcessor.cuh"}};
// static String [][] GPU_SRC_FILES = {{"*","dtt8x8.cuh","TileProcessor.cuh"}};
static String GPU_CONVERT_DIRECT_NAME = "convert_direct"; // name in C code static String GPU_CONVERT_DIRECT_NAME = "convert_direct"; // name in C code
static String GPU_IMCLT_ALL_NAME = "imclt_rbg_all"; static String GPU_IMCLT_ALL_NAME = "imclt_rbg_all";
static String GPU_CORRELATE2D_NAME = "correlate2D"; // name in C code static String GPU_CORRELATE2D_NAME = "correlate2D"; // name in C code
// static String GPU_TEXTURES_NAME = "textures_accumulate"; // name in C code
static String GPU_TEXTURES_NAME = "textures_nonoverlap"; // name in C code static String GPU_TEXTURES_NAME = "textures_nonoverlap"; // name in C code
static String GPU_RBGA_NAME = "generate_RBGA"; // name in C code static String GPU_RBGA_NAME = "generate_RBGA"; // name in C code
static String GPU_ROT_DERIV = "calc_rot_deriv"; // calculate rotation matrices and derivatives static String GPU_ROT_DERIV = "calc_rot_deriv"; // calculate rotation matrices and derivatives
static String GPU_SET_TILES_OFFSETS = "get_tiles_offsets"; // calculate pixel offsets and disparity distortions static String GPU_SET_TILES_OFFSETS = "get_tiles_offsets"; // calculate pixel offsets and disparity distortions
static String GPU_CALC_REVERSE_DISTORTION = "calcReverseDistortionTable"; // calculate reverse radial distortion table from gpu_geometry_correction static String GPU_CALC_REVERSE_DISTORTION = "calcReverseDistortionTable"; // calculate reverse radial distortion table from gpu_geometry_correction
// pass some defines to gpu source code with #ifdef JCUDA // pass some defines to gpu source code with #ifdef JCUDA
public static int DTT_SIZE_LOG2 = 3; public static int DTT_SIZE_LOG2 = 3;
public static int DTT_SIZE = (1 << DTT_SIZE_LOG2); public static int DTT_SIZE = (1 << DTT_SIZE_LOG2);
...@@ -171,8 +163,11 @@ public class GPUTileProcessor { ...@@ -171,8 +163,11 @@ public class GPUTileProcessor {
private CUfunction GPU_SET_TILES_OFFSETS_kernel = null; private CUfunction GPU_SET_TILES_OFFSETS_kernel = null;
private CUfunction GPU_CALC_REVERSE_DISTORTION_kernel = null; private CUfunction GPU_CALC_REVERSE_DISTORTION_kernel = null;
CUmodule module; // to access constants memory
// CPU arrays of pointers to GPU memory // CPU arrays of pointers to GPU memory
// Moved to GpuQuad class
/*
// These arrays may go to methods, they are here just to be able to free GPU memory if needed // These arrays may go to methods, they are here just to be able to free GPU memory if needed
private CUdeviceptr [] gpu_kernels_h = new CUdeviceptr[NUM_CAMS]; private CUdeviceptr [] gpu_kernels_h = new CUdeviceptr[NUM_CAMS];
private CUdeviceptr [] gpu_kernel_offsets_h = new CUdeviceptr[NUM_CAMS]; private CUdeviceptr [] gpu_kernel_offsets_h = new CUdeviceptr[NUM_CAMS];
...@@ -221,6 +216,10 @@ public class GPUTileProcessor { ...@@ -221,6 +216,10 @@ public class GPUTileProcessor {
public int num_task_tiles; public int num_task_tiles;
public int num_corr_tiles; public int num_corr_tiles;
public int num_texture_tiles; public int num_texture_tiles;
*/
// public GpuQuad [][] gpuQuad; // array of GpuQuad instances 2x2? ({{rgb, rgb_macro}, {lwir, lwir_macro})
// initilize with 4 dimensions each
public class TpTask { public class TpTask {
public int task; // [0](+1) - generate 4 images, [4..9]+16..+512 - correlation pairs, 2 - generate texture tiles public int task; // [0](+1) - generate 4 images, [4..9]+16..+512 - correlation pairs, 2 - generate texture tiles
public float target_disparity; public float target_disparity;
...@@ -273,7 +272,8 @@ public class GPUTileProcessor { ...@@ -273,7 +272,8 @@ public class GPUTileProcessor {
} }
} }
public class CltExtra{ /*
public class CltExtra{ // never used?
public float data_x; // kernel data is relative to this displacement X (0.5 pixel increments) public float data_x; // kernel data is relative to this displacement X (0.5 pixel increments)
public float data_y; // kernel data is relative to this displacement Y (0.5 pixel increments) public float data_y; // kernel data is relative to this displacement Y (0.5 pixel increments)
public float center_x; // actual center X (use to find derivatives) public float center_x; // actual center X (use to find derivatives)
...@@ -328,7 +328,8 @@ public class GPUTileProcessor { ...@@ -328,7 +328,8 @@ public class GPUTileProcessor {
flt[indx++] = this.dyc_dy; flt[indx++] = this.dyc_dy;
return flt; return flt;
} }
}; }
*/
private static long getPointerAddress(CUdeviceptr p) private static long getPointerAddress(CUdeviceptr p)
...@@ -451,7 +452,6 @@ public class GPUTileProcessor { ...@@ -451,7 +452,6 @@ public class GPUTileProcessor {
// Create the kernel functions (first - just test) // Create the kernel functions (first - just test)
String [] func_names = { String [] func_names = {
// GPU_CONVERT_CORRECT_TILES_NAME,
GPU_CONVERT_DIRECT_NAME, GPU_CONVERT_DIRECT_NAME,
GPU_IMCLT_ALL_NAME, GPU_IMCLT_ALL_NAME,
GPU_CORRELATE2D_NAME, GPU_CORRELATE2D_NAME,
...@@ -465,7 +465,6 @@ public class GPUTileProcessor { ...@@ -465,7 +465,6 @@ public class GPUTileProcessor {
func_names, func_names,
capability); // on my - 75 capability); // on my - 75
// GPU_CONVERT_CORRECT_TILES_kernel = functions[0];
GPU_CONVERT_DIRECT_kernel = functions[0]; GPU_CONVERT_DIRECT_kernel = functions[0];
GPU_IMCLT_ALL_kernel = functions[1]; GPU_IMCLT_ALL_kernel = functions[1];
GPU_CORRELATE2D_kernel = functions[2]; GPU_CORRELATE2D_kernel = functions[2];
...@@ -476,7 +475,6 @@ public class GPUTileProcessor { ...@@ -476,7 +475,6 @@ public class GPUTileProcessor {
GPU_CALC_REVERSE_DISTORTION_kernel = functions[7]; GPU_CALC_REVERSE_DISTORTION_kernel = functions[7];
System.out.println("GPU kernel functions initialized"); System.out.println("GPU kernel functions initialized");
// System.out.println(GPU_CONVERT_CORRECT_TILES_kernel.toString());
System.out.println(GPU_CONVERT_DIRECT_kernel.toString()); System.out.println(GPU_CONVERT_DIRECT_kernel.toString());
System.out.println(GPU_IMCLT_ALL_kernel.toString()); System.out.println(GPU_IMCLT_ALL_kernel.toString());
System.out.println(GPU_CORRELATE2D_kernel.toString()); System.out.println(GPU_CORRELATE2D_kernel.toString());
...@@ -486,11 +484,249 @@ public class GPUTileProcessor { ...@@ -486,11 +484,249 @@ public class GPUTileProcessor {
System.out.println(GPU_SET_TILES_OFFSETS_kernel.toString()); System.out.println(GPU_SET_TILES_OFFSETS_kernel.toString());
System.out.println(GPU_CALC_REVERSE_DISTORTION_kernel.toString()); System.out.println(GPU_CALC_REVERSE_DISTORTION_kernel.toString());
// GPU data structures are now initialized through GpuQuad instances
}
public static String [] getCorrTitles() {
return new String []{"hor-top","hor-bottom","vert-left","vert-right","diag-main","diag-other"};
}
public static double [][] getCorr2DView(
int tilesX,
int tilesY,
int [] indices,
float [][] corr2d,
int [] wh){ // if is [2] - return width, height
if ((corr2d == null) || (corr2d.length == 0)) {
return new double [NUM_PAIRS][0];
}
int corr_size = (int)(Math.round(Math.sqrt(corr2d[0].length)));// make smaller later?
int width = tilesX * (corr_size + 1) + 1;
int height = tilesY * (corr_size + 1) + 1;
double [][] data = new double [NUM_PAIRS][];
data[0] = new double[height*width];
for (int ty = 0; ty < tilesY; ty++) {
for (int tx = 0; tx < tilesX; tx++) {
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
data[0][(ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1)] = Double.NaN;
}
}
}
}
for (int np = 1; np < NUM_PAIRS; np++) {
data[np] = data[0].clone();
}
for (int n = 0; n < indices.length; n++) {
int nt = indices[n] >> CORR_NTILE_SHIFT;
int np = indices[n] & CORR_PAIRS_MASK; // ((1 << CORR_NTILE_SHIFT) - 1); // np should
assert np < NUM_PAIRS : "invalid correllation pair";
int tx = nt % tilesX;
int ty = nt / tilesX;
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
//java.lang.ArrayIndexOutOfBoundsException: 20081634
int indx1 = (ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1);
int indx2 = i*corr_size+j;
// if ((indx1 > data[0].length) || (indx1 > data[0].length)){
// System.out.println("Bugggg!)");
// }
data[np][indx1] = corr2d[n][indx2];
}
}
}
if (wh != null) {
wh[0] = width;
wh[1] = height;
}
return data;
}
// private static CUfunction [] createFunctions(
private CUfunction [] createFunctions(
String [] sourceCodeUnits,
String [] kernelNames,
int capability
) throws IOException
{
CUfunction [] functions = new CUfunction [kernelNames.length];
byte[][] ptxDataUnits = new byte [sourceCodeUnits.length][];
boolean OK = false;
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
String sourceCode = sourceCodeUnits[cunit];
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
String options[] = {"--gpu-architecture=compute_"+capability};
try {
nvrtcCompileProgram(program, options.length, options);
OK = true;
} catch (Exception e) {
System.out.println("nvrtcCompileProgram() FAILED");
}
// Compilation log with errors/warnings
String programLog[] = new String[1];
nvrtcGetProgramLog(program, programLog);
String log = programLog[0].trim();
if (!log.isEmpty())
{
System.err.println("Program compilation log:\n" + log);
}
if (!OK) {
throw new IOException("Could not compile program");
}
// Get the PTX code of the compiled program (not the binary)
String[] ptx = new String[1];
nvrtcGetPTX(program, ptx);
nvrtcDestroyProgram(program);
ptxDataUnits[cunit] = ptx[0].getBytes();
System.out.println("ptxDataUnits["+cunit+"].length="+ptxDataUnits[cunit].length);
}
JITOptions jitOptions = new JITOptions();
jitOptions.putInt(CU_JIT_LOG_VERBOSE, 1);
CUlinkState state = new CUlinkState();
cuLinkCreate(jitOptions, state);
cuLinkAddFile(state, CU_JIT_INPUT_LIBRARY, LIBRARY_PATH, jitOptions);
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
cuLinkAddData(state, CU_JIT_INPUT_PTX, Pointer.to(ptxDataUnits[cunit]), ptxDataUnits[cunit].length, "input"+cunit+".ptx", jitOptions); // CUDA_ERROR_INVALID_PTX
}
long size[] = { 0 };
Pointer image = new Pointer();
JCudaDriver.setExceptionsEnabled(false);
int cuda_result = cuLinkComplete(state, image, size);
System.out.println("cuLinkComplete() -> "+cuda_result);
JCudaDriver.setExceptionsEnabled(true);
module = new CUmodule();
cuModuleLoadDataEx(module, image, 0, new int[0], Pointer.to(new int[0]));
cuLinkDestroy(state);
for (int i = 0; i < kernelNames.length; i++) {
// Find the function in the source by name, get its pointer
functions[i] = new CUfunction();
cuModuleGetFunction(functions[i] , module, kernelNames[i]);
}
return functions;
}
static String readFileAsString(String path)
{
byte[] encoded;
try {
encoded = Files.readAllBytes(Paths.get(path));
} catch (IOException e) {
return null;
}
return new String(encoded, StandardCharsets.UTF_8);
}
public class GpuQuad{ // quad camera description
public final int IMG_WIDTH;
public final int IMG_HEIGHT;
public final int NUM_CAMS;
public final int NUM_COLORS; // maybe should always be 3?
// public final GPUTileProcessor gPUTileProcessor;
// CPU arrays of pointers to GPU memory
// These arrays may go to methods, they are here just to be able to free GPU memory if needed
private CUdeviceptr [] gpu_kernels_h;
private CUdeviceptr [] gpu_kernel_offsets_h;
private CUdeviceptr [] gpu_bayer_h;
private CUdeviceptr [] gpu_clt_h;
private CUdeviceptr [] gpu_corr_images_h;
// GPU pointers to array of GPU pointers
private CUdeviceptr gpu_kernels;
private CUdeviceptr gpu_kernel_offsets;
private CUdeviceptr gpu_bayer;
private CUdeviceptr gpu_tasks;
private CUdeviceptr gpu_corrs;
private CUdeviceptr gpu_textures;
private CUdeviceptr gpu_clt;
private CUdeviceptr gpu_4_images;
private CUdeviceptr gpu_corr_indices;
private CUdeviceptr gpu_num_corr_tiles;
private CUdeviceptr gpu_texture_indices_ovlp;
private CUdeviceptr gpu_num_texture_ovlp;
private CUdeviceptr gpu_texture_indices;
private CUdeviceptr gpu_texture_indices_len;
private CUdeviceptr gpu_diff_rgb_combo;
private CUdeviceptr gpu_color_weights;
private CUdeviceptr gpu_generate_RBGA_params;
private CUdeviceptr gpu_woi;
private CUdeviceptr gpu_textures_rgba;
private CUdeviceptr gpu_correction_vector;
private CUdeviceptr gpu_rot_deriv;
private CUdeviceptr gpu_geometry_correction;
private CUdeviceptr gpu_rByRDist;
private CUdeviceptr gpu_active_tiles;
private CUdeviceptr gpu_num_active_tiles;
private int mclt_stride;
private int corr_stride;
private int imclt_stride;
private int texture_stride;
private int texture_stride_rgba;
private int num_task_tiles;
private int num_corr_tiles;
private int num_texture_tiles;
public GpuQuad(
// final GPUTileProcessor gPUTileProcessor,
final int img_width,
final int img_height,
final int num_cams,
final int num_colors
) {
// this.gPUTileProcessor = gPUTileProcessor;
IMG_WIDTH = img_width;
IMG_HEIGHT = img_height;
NUM_CAMS = num_cams;
NUM_COLORS = num_colors; // maybe should always be 3?
// CPU arrays of pointers to GPU memory
// These arrays may go to methods, they are here just to be able to free GPU memory if needed
gpu_kernels_h = new CUdeviceptr[NUM_CAMS];
gpu_kernel_offsets_h = new CUdeviceptr[NUM_CAMS];
gpu_bayer_h = new CUdeviceptr[NUM_CAMS];
gpu_clt_h = new CUdeviceptr[NUM_CAMS];
gpu_corr_images_h= new CUdeviceptr[NUM_CAMS];
// GPU pointers to array of GPU pointers
gpu_kernels = new CUdeviceptr();
gpu_kernel_offsets = new CUdeviceptr();
gpu_bayer = new CUdeviceptr();
gpu_tasks = new CUdeviceptr(); // allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT
gpu_corrs = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.FLOAT
gpu_textures = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
gpu_clt = new CUdeviceptr();
gpu_4_images = new CUdeviceptr();
gpu_corr_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_num_corr_tiles = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_texture_indices_ovlp =new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_num_texture_ovlp = new CUdeviceptr(); // 8 ints
gpu_texture_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_texture_indices_len = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
gpu_diff_rgb_combo = new CUdeviceptr(); // 1 int
gpu_color_weights = new CUdeviceptr(); // allocate 3 * Sizeof.FLOAT
gpu_generate_RBGA_params =new CUdeviceptr(); // allocate 5 * Sizeof.FLOAT
gpu_woi = new CUdeviceptr(); // 4 integers (x, y, width, height) Rectangle - in tiles
gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
gpu_correction_vector= new CUdeviceptr();
gpu_rot_deriv= new CUdeviceptr(); // used internally by device, may be read to CPU for testing
gpu_geometry_correction= new CUdeviceptr();
gpu_rByRDist= new CUdeviceptr(); // calculated once for the camera distortion model in CPU (move to GPU?)
gpu_active_tiles = new CUdeviceptr(); // TILESX*TILESY*sizeof(int)
gpu_num_active_tiles = new CUdeviceptr(); // 1 int
// Init data arrays for all kernels // Init data arrays for all kernels
int tilesX = IMG_WIDTH / DTT_SIZE; int tilesX = IMG_WIDTH / DTT_SIZE;
int tilesY = IMG_HEIGHT / DTT_SIZE; int tilesY = IMG_HEIGHT / DTT_SIZE;
long [] device_stride = new long [1]; long [] device_stride = new long [1];
for (int ncam = 0; ncam < NUM_CAMS; ncam++) { for (int ncam = 0; ncam < NUM_CAMS; ncam++) {
gpu_kernels_h[ncam] = new CUdeviceptr(); gpu_kernels_h[ncam] = new CUdeviceptr();
cuMemAlloc(gpu_kernels_h[ncam],KERN_SIZE * Sizeof.FLOAT ); // public static int cuMemAlloc(CUdeviceptr dptr, long bytesize) cuMemAlloc(gpu_kernels_h[ncam],KERN_SIZE * Sizeof.FLOAT ); // public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
...@@ -504,7 +740,6 @@ public class GPUTileProcessor { ...@@ -504,7 +740,6 @@ public class GPUTileProcessor {
IMG_HEIGHT, // long Height, IMG_HEIGHT, // long Height,
Sizeof.FLOAT); // int ElementSizeBytes) Sizeof.FLOAT); // int ElementSizeBytes)
mclt_stride = (int)(device_stride[0] / Sizeof.FLOAT); mclt_stride = (int)(device_stride[0] / Sizeof.FLOAT);
gpu_corr_images_h[ncam] = new CUdeviceptr(); gpu_corr_images_h[ncam] = new CUdeviceptr();
cuMemAllocPitch ( cuMemAllocPitch (
gpu_corr_images_h[ncam], // CUdeviceptr dptr, gpu_corr_images_h[ncam], // CUdeviceptr dptr,
...@@ -515,7 +750,6 @@ public class GPUTileProcessor { ...@@ -515,7 +750,6 @@ public class GPUTileProcessor {
imclt_stride = (int)(device_stride[0] / Sizeof.FLOAT); imclt_stride = (int)(device_stride[0] / Sizeof.FLOAT);
gpu_clt_h[ncam] = new CUdeviceptr(); gpu_clt_h[ncam] = new CUdeviceptr();
cuMemAlloc(gpu_clt_h[ncam],tilesY * tilesX * NUM_COLORS * 4 * DTT_SIZE * DTT_SIZE * Sizeof.FLOAT ); // public static int cuMemAlloc(CUdeviceptr dptr, long bytesize) cuMemAlloc(gpu_clt_h[ncam],tilesY * tilesX * NUM_COLORS * 4 * DTT_SIZE * DTT_SIZE * Sizeof.FLOAT ); // public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
} }
// now create device arrays pointers // now create device arrays pointers
if (Sizeof.POINTER != Sizeof.LONG) { if (Sizeof.POINTER != Sizeof.LONG) {
...@@ -558,7 +792,7 @@ public class GPUTileProcessor { ...@@ -558,7 +792,7 @@ public class GPUTileProcessor {
// Set task array // Set task array
cuMemAlloc(gpu_tasks, tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT); cuMemAlloc(gpu_tasks, tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT);
//=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============ //=========== Seems that in many places Sizeof.POINTER (==8) is used instead of Sizeof.FLOAT !!! ============
// Set corrs array // Set corrs array
cuMemAlloc(gpu_corr_indices, tilesX * tilesY * NUM_PAIRS * Sizeof.FLOAT); cuMemAlloc(gpu_corr_indices, tilesX * tilesY * NUM_PAIRS * Sizeof.FLOAT);
cuMemAlloc(gpu_num_corr_tiles, 1 * Sizeof.FLOAT); cuMemAlloc(gpu_num_corr_tiles, 1 * Sizeof.FLOAT);
...@@ -589,7 +823,6 @@ public class GPUTileProcessor { ...@@ -589,7 +823,6 @@ public class GPUTileProcessor {
NUM_PAIRS * tilesX * tilesY, // long Height, NUM_PAIRS * tilesX * tilesY, // long Height,
Sizeof.FLOAT); // int ElementSizeBytes) Sizeof.FLOAT); // int ElementSizeBytes)
corr_stride = (int)(device_stride[0] / Sizeof.FLOAT); corr_stride = (int)(device_stride[0] / Sizeof.FLOAT);
int max_texture_size = (NUM_COLORS + 1 + (NUM_CAMS + NUM_COLORS + 1)) * (2 * DTT_SIZE)* (2 * DTT_SIZE); int max_texture_size = (NUM_COLORS + 1 + (NUM_CAMS + NUM_COLORS + 1)) * (2 * DTT_SIZE)* (2 * DTT_SIZE);
cuMemAllocPitch ( cuMemAllocPitch (
gpu_textures, // CUdeviceptr dptr, gpu_textures, // CUdeviceptr dptr,
...@@ -601,7 +834,6 @@ public class GPUTileProcessor { ...@@ -601,7 +834,6 @@ public class GPUTileProcessor {
int max_rgba_width = (tilesX + 1) * DTT_SIZE; int max_rgba_width = (tilesX + 1) * DTT_SIZE;
int max_rgba_height = (tilesY + 1) * DTT_SIZE; int max_rgba_height = (tilesY + 1) * DTT_SIZE;
int max_rbga_slices = NUM_COLORS + 1; int max_rbga_slices = NUM_COLORS + 1;
cuMemAllocPitch ( cuMemAllocPitch (
gpu_textures_rgba, // CUdeviceptr dptr, gpu_textures_rgba, // CUdeviceptr dptr,
device_stride, // long[] pPitch, device_stride, // long[] pPitch,
...@@ -609,7 +841,6 @@ public class GPUTileProcessor { ...@@ -609,7 +841,6 @@ public class GPUTileProcessor {
max_rgba_height * max_rbga_slices, // long Height, max_rgba_height * max_rbga_slices, // long Height,
Sizeof.FLOAT); // int ElementSizeBytes) Sizeof.FLOAT); // int ElementSizeBytes)
texture_stride_rgba = (int)(device_stride[0] / Sizeof.FLOAT); texture_stride_rgba = (int)(device_stride[0] / Sizeof.FLOAT);
} }
public void setGeometryCorrection(GeometryCorrection gc, public void setGeometryCorrection(GeometryCorrection gc,
...@@ -681,11 +912,7 @@ public class GPUTileProcessor { ...@@ -681,11 +912,7 @@ public class GPUTileProcessor {
return texture_indices; return texture_indices;
} }
//texture_indices
//texture_indices
public void setConvolutionKernel( public void setConvolutionKernel(
float [] kernel, // [tileY][tileX][color][..] float [] kernel, // [tileY][tileX][color][..]
float [] kernel_offsets, float [] kernel_offsets,
...@@ -900,30 +1127,25 @@ public class GPUTileProcessor { ...@@ -900,30 +1127,25 @@ public class GPUTileProcessor {
num_tiles--; num_tiles--;
} }
} }
//nextInt(int bound) //nextInt(int bound)
} }
if (dbg1) { if (dbg1) {
// mask[(woi.y-1) * tilesX + (woi.x-1)] = true;
mask[(woi.y+woi.height) * tilesX + (woi.x+woi.width)] = true; mask[(woi.y+woi.height) * tilesX + (woi.x+woi.width)] = true;
num_tiles += 1; // 2; num_tiles += 1; // 2;
} }
// TpTask [] tp_tasks = new TpTask[tilesX*tilesY];
TpTask [] tp_tasks = new TpTask[num_tiles]; TpTask [] tp_tasks = new TpTask[num_tiles];
int indx = 0; int indx = 0;
for (int ty = 0; ty < tilesY; ty++) { for (int ty = 0; ty < tilesY; ty++) {
for (int tx = 0; tx < tilesX; tx++) if (mask[ty * tilesX + tx]) { for (int tx = 0; tx < tilesX; tx++) if (mask[ty * tilesX + tx]) {
// Only generate for non-empty tasks, use 1 empty empty as a terminator?
// tp_tasks[indx] = new TpTask(tx,ty, target_disparities[indx], 1); // task == 1 for now
// Only generate for non-empty tasks, use 1 empty empty as a terminator?
tp_tasks[indx] = new TpTask(tx,ty, target_disparities[indx], tp_tasks[indx] = new TpTask(tx,ty, target_disparities[indx],
((out_images[indx] & 0x0f) << 0) | ((out_images[indx] & 0x0f) << 0) |
((corr_mask [indx] & 0x3f) << 4) ((corr_mask [indx] & 0x3f) << 4)
); // task == 1 for now ); // task == 1 for now
indx++; indx++;
} }
} }
...@@ -985,7 +1207,6 @@ public class GPUTileProcessor { ...@@ -985,7 +1207,6 @@ public class GPUTileProcessor {
num_textures++; num_textures++;
} }
} }
int [] iarr = new int[num_textures]; int [] iarr = new int[num_textures];
num_textures = 0; num_textures = 0;
int b = (1 << LIST_TEXTURE_BIT); int b = (1 << LIST_TEXTURE_BIT);
...@@ -998,67 +1219,7 @@ public class GPUTileProcessor { ...@@ -998,67 +1219,7 @@ public class GPUTileProcessor {
return iarr; return iarr;
} }
// All data is already copied to GPU memory
public static String [] getCorrTitles() {
return new String []{"hor-top","hor-bottom","vert-left","vert-right","diag-main","diag-other"};
}
public static double [][] getCorr2DView(
int tilesX,
int tilesY,
int [] indices,
float [][] corr2d,
int [] wh){ // if is [2] - return width, height
if ((corr2d == null) || (corr2d.length == 0)) {
return new double [NUM_PAIRS][0];
}
int corr_size = (int)(Math.round(Math.sqrt(corr2d[0].length)));// make smaller later?
int width = tilesX * (corr_size + 1) + 1;
int height = tilesY * (corr_size + 1) + 1;
double [][] data = new double [NUM_PAIRS][];
data[0] = new double[height*width];
for (int ty = 0; ty < tilesY; ty++) {
for (int tx = 0; tx < tilesX; tx++) {
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
data[0][(ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1)] = Double.NaN;
}
}
}
}
for (int np = 1; np < NUM_PAIRS; np++) {
data[np] = data[0].clone();
}
for (int n = 0; n < indices.length; n++) {
int nt = indices[n] >> CORR_NTILE_SHIFT;
int np = indices[n] & CORR_PAIRS_MASK; // ((1 << CORR_NTILE_SHIFT) - 1); // np should
assert np < NUM_PAIRS : "invalid correllation pair";
int tx = nt % tilesX;
int ty = nt / tilesX;
for (int i = 0; i< corr_size; i++) {
for (int j = 0; j < corr_size; j++) {
//java.lang.ArrayIndexOutOfBoundsException: 20081634
int indx1 = (ty * (corr_size + 1) + i + 1) * width + (tx * (corr_size + 1) + j + 1);
int indx2 = i*corr_size+j;
// if ((indx1 > data[0].length) || (indx1 > data[0].length)){
// System.out.println("Bugggg!)");
// }
data[np][indx1] = corr2d[n][indx2];
}
}
}
if (wh != null) {
wh[0] = width;
wh[1] = height;
}
return data;
}
// All data is already copied to GPU memory
public void execRotDerivs() { public void execRotDerivs() {
if (GPU_ROT_DERIV_kernel == null) if (GPU_ROT_DERIV_kernel == null)
...@@ -1083,6 +1244,7 @@ public class GPUTileProcessor { ...@@ -1083,6 +1244,7 @@ public class GPUTileProcessor {
kernelParameters, null); // Kernel- and extra parameters kernelParameters, null); // Kernel- and extra parameters
cuCtxSynchronize(); // remove later cuCtxSynchronize(); // remove later
} }
public void execCalcReverseDistortions() { public void execCalcReverseDistortions() {
if (GPU_CALC_REVERSE_DISTORTION_kernel == null) if (GPU_CALC_REVERSE_DISTORTION_kernel == null)
{ {
...@@ -1201,7 +1363,6 @@ public class GPUTileProcessor { ...@@ -1201,7 +1363,6 @@ public class GPUTileProcessor {
cuCtxSynchronize(); cuCtxSynchronize();
} }
public void execCorr2D( public void execCorr2D(
double [] scales, double [] scales,
double fat_zero, double fat_zero,
...@@ -1217,8 +1378,6 @@ public class GPUTileProcessor { ...@@ -1217,8 +1378,6 @@ public class GPUTileProcessor {
float fscale0 = (float) scales[0]; float fscale0 = (float) scales[0];
float fscale1 = (num_colors >1)?((float) scales[1]):0.0f; float fscale1 = (num_colors >1)?((float) scales[1]):0.0f;
float fscale2 = (num_colors >2)?((float) scales[2]):0.0f; float fscale2 = (num_colors >2)?((float) scales[2]):0.0f;
// int [] GridFullWarps = {(num_corr_tiles + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1};
// int [] ThreadsFullWarps = {CORR_THREADS_PER_TILE, CORR_TILES_PER_BLOCK, 1};
int [] GridFullWarps = {1, 1, 1}; int [] GridFullWarps = {1, 1, 1};
int [] ThreadsFullWarps = {1, 1, 1}; int [] ThreadsFullWarps = {1, 1, 1};
Pointer kernelParameters = Pointer.to( Pointer kernelParameters = Pointer.to(
...@@ -1300,11 +1459,6 @@ public class GPUTileProcessor { ...@@ -1300,11 +1459,6 @@ public class GPUTileProcessor {
Pointer.to(new int[] {num_colors}), // int colors, // number of colors (3/1) Pointer.to(new int[] {num_colors}), // int colors, // number of colors (3/1)
Pointer.to(new int[] {iis_lwir}), // int is_lwir, // do not perform shot correction Pointer.to(new int[] {iis_lwir}), // int is_lwir, // do not perform shot correction
Pointer.to(gpu_generate_RBGA_params), // float generate_RBGA_params[5], Pointer.to(gpu_generate_RBGA_params), // float generate_RBGA_params[5],
// Pointer.to(new float[] {(float) min_shot}), // float min_shot, // 10.0
// Pointer.to(new float[] {(float) scale_shot}), // float scale_shot, // 3.0
// Pointer.to(new float[] {(float) diff_sigma}), // float diff_sigma, // pixel value/pixel change
// Pointer.to(new float[] {(float) diff_threshold}),// float diff_threshold, // pixel value/pixel change
// Pointer.to(new float[] {(float) min_agree}), // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G
Pointer.to(new int[] { idust_remove }), // int dust_remove, // Do not reduce average weight when only one image differes much from the average Pointer.to(new int[] { idust_remove }), // int dust_remove, // Do not reduce average weight when only one image differes much from the average
Pointer.to(new int[] {0}), // int keep_weights, // return channel weights after A in RGBA Pointer.to(new int[] {0}), // int keep_weights, // return channel weights after A in RGBA
...@@ -1371,15 +1525,8 @@ public class GPUTileProcessor { ...@@ -1371,15 +1525,8 @@ public class GPUTileProcessor {
Pointer.to(new int[] { num_colors }), Pointer.to(new int[] { num_colors }),
Pointer.to(new int[] { iis_lwir }), Pointer.to(new int[] { iis_lwir }),
Pointer.to(gpu_generate_RBGA_params), // float generate_RBGA_params[5], Pointer.to(gpu_generate_RBGA_params), // float generate_RBGA_params[5],
// Pointer.to(new float[] {(float) min_shot}), // float min_shot, // 10.0
// Pointer.to(new float[] {(float) scale_shot}), // float scale_shot, // 3.0
// Pointer.to(new float[] {(float) diff_sigma}), // float diff_sigma, // pixel value/pixel change
// Pointer.to(new float[] {(float) diff_threshold}),// float diff_threshold, // pixel value/pixel change
// Pointer.to(new float[] {(float) min_agree}), // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G
Pointer.to(new int[] { idust_remove }), Pointer.to(new int[] { idust_remove }),
// Pointer.to(new int[] { 0}), // texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
// Pointer.to(new int[] {0}), // gpu_textures),
Pointer.to(new int[] {texture_stride}), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles Pointer.to(new int[] {texture_stride}), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_textures), Pointer.to(gpu_textures),
Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
...@@ -1393,9 +1540,6 @@ public class GPUTileProcessor { ...@@ -1393,9 +1540,6 @@ public class GPUTileProcessor {
cuCtxSynchronize(); cuCtxSynchronize();
} }
public float [][] getCorr2D(int corr_rad){ public float [][] getCorr2D(int corr_rad){
int corr_size = (2 * corr_rad + 1) * (2 * corr_rad + 1); int corr_size = (2 * corr_rad + 1) * (2 * corr_rad + 1);
float [] cpu_corrs = new float [ num_corr_tiles * corr_size]; float [] cpu_corrs = new float [ num_corr_tiles * corr_size];
...@@ -1419,6 +1563,7 @@ public class GPUTileProcessor { ...@@ -1419,6 +1563,7 @@ public class GPUTileProcessor {
} }
return corrs; return corrs;
} }
public int [] getCorrIndices() { public int [] getCorrIndices() {
float [] fnum_corrs = new float[1]; float [] fnum_corrs = new float[1];
cuMemcpyDtoH(Pointer.to(fnum_corrs), gpu_num_corr_tiles, 1 * Sizeof.FLOAT); cuMemcpyDtoH(Pointer.to(fnum_corrs), gpu_num_corr_tiles, 1 * Sizeof.FLOAT);
...@@ -1437,17 +1582,6 @@ public class GPUTileProcessor { ...@@ -1437,17 +1582,6 @@ public class GPUTileProcessor {
// read extra data for macro generation: 4 DIFFs, 4 of R, 4 of B, 4 of G // read extra data for macro generation: 4 DIFFs, 4 of R, 4 of B, 4 of G
public float [][] getExtra(){ public float [][] getExtra(){
int [] texture_indices = getTextureIndices(); int [] texture_indices = getTextureIndices();
/*
float [] fnum_tiles = new float[1];
cuMemcpyDtoH(Pointer.to(fnum_tiles), gpu_num_texture_ovlp, 1 * Sizeof.FLOAT);
int num_tiles = Float.floatToIntBits(fnum_tiles[0]);
float [] ftiles = new float[num_tiles];
cuMemcpyDtoH(Pointer.to(fnum_tiles), gpu_texture_indices_ovlp, num_tiles * Sizeof.FLOAT);
int [] tiles = new int[num_tiles];
for (int i = 0; i < num_tiles; i++) {
tiles[i] = Float.floatToIntBits(ftiles[i]);
}
*/
int num_tile_extra = NUM_CAMS*(NUM_COLORS+1); int num_tile_extra = NUM_CAMS*(NUM_COLORS+1);
float [] diff_rgb_combo = new float[texture_indices.length * num_tile_extra]; float [] diff_rgb_combo = new float[texture_indices.length * num_tile_extra];
cuMemcpyDtoH(Pointer.to(diff_rgb_combo), gpu_diff_rgb_combo, diff_rgb_combo.length * Sizeof.FLOAT); cuMemcpyDtoH(Pointer.to(diff_rgb_combo), gpu_diff_rgb_combo, diff_rgb_combo.length * Sizeof.FLOAT);
...@@ -1492,7 +1626,6 @@ public class GPUTileProcessor { ...@@ -1492,7 +1626,6 @@ public class GPUTileProcessor {
copy_rbga.srcDevice = gpu_textures_rgba; copy_rbga.srcDevice = gpu_textures_rgba;
copy_rbga.srcPitch = texture_stride_rgba * Sizeof.FLOAT; copy_rbga.srcPitch = texture_stride_rgba * Sizeof.FLOAT;
copy_rbga.dstMemoryType = CUmemorytype.CU_MEMORYTYPE_HOST; copy_rbga.dstMemoryType = CUmemorytype.CU_MEMORYTYPE_HOST;
// copy_woi.dstHost = Pointer.to(rslt);
copy_rbga.dstPitch = woi.width * Sizeof.FLOAT; copy_rbga.dstPitch = woi.width * Sizeof.FLOAT;
copy_rbga.WidthInBytes = woi.width * Sizeof.FLOAT; copy_rbga.WidthInBytes = woi.width * Sizeof.FLOAT;
...@@ -1516,7 +1649,6 @@ public class GPUTileProcessor { ...@@ -1516,7 +1649,6 @@ public class GPUTileProcessor {
int texture_slice_size = (2 * DTT_SIZE)* (2 * DTT_SIZE); // number of (float) elements in a single slice of a tile int texture_slice_size = (2 * DTT_SIZE)* (2 * DTT_SIZE); // number of (float) elements in a single slice of a tile
int texture_tile_size = texture_slices * texture_slice_size; // number of (float) elements in a multi-slice tile int texture_tile_size = texture_slices * texture_slice_size; // number of (float) elements in a multi-slice tile
int texture_size = texture_tile_size * num_texture_tiles; // number of (float) elements in the whole texture int texture_size = texture_tile_size * num_texture_tiles; // number of (float) elements in the whole texture
// float [] cpu_textures = new float [ num_texture_tiles * texture_size];
float [] cpu_textures = new float [texture_size]; float [] cpu_textures = new float [texture_size];
CUDA_MEMCPY2D copyD2H = new CUDA_MEMCPY2D(); CUDA_MEMCPY2D copyD2H = new CUDA_MEMCPY2D();
copyD2H.srcMemoryType = CUmemorytype.CU_MEMORYTYPE_DEVICE; copyD2H.srcMemoryType = CUmemorytype.CU_MEMORYTYPE_DEVICE;
...@@ -1588,7 +1720,6 @@ public class GPUTileProcessor { ...@@ -1588,7 +1720,6 @@ public class GPUTileProcessor {
return textures; return textures;
} }
public double [][][][] doubleTextures( public double [][][][] doubleTextures(
Rectangle woi, Rectangle woi,
int [] indices, int [] indices,
...@@ -1617,9 +1748,6 @@ public class GPUTileProcessor { ...@@ -1617,9 +1748,6 @@ public class GPUTileProcessor {
return textures; return textures;
} }
public float [][] getRBG (int ncam){ public float [][] getRBG (int ncam){
int height = (IMG_HEIGHT + DTT_SIZE); int height = (IMG_HEIGHT + DTT_SIZE);
int width = (IMG_WIDTH + DTT_SIZE); int width = (IMG_WIDTH + DTT_SIZE);
...@@ -1649,98 +1777,6 @@ public class GPUTileProcessor { ...@@ -1649,98 +1777,6 @@ public class GPUTileProcessor {
return fimg; return fimg;
} }
// private static CUfunction [] createFunctions(
private CUfunction [] createFunctions(
String [] sourceCodeUnits,
String [] kernelNames,
int capability
) throws IOException
{
CUfunction [] functions = new CUfunction [kernelNames.length];
byte[][] ptxDataUnits = new byte [sourceCodeUnits.length][];
boolean OK = false;
// for (String sourceCode: sourceCodeUnits) {
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
String sourceCode = sourceCodeUnits[cunit];
// System.out.print(sourceCode);
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
String options[] = {"--gpu-architecture=compute_"+capability};
try {
nvrtcCompileProgram(program, options.length, options);
OK = true;
} catch (Exception e) {
System.out.println("nvrtcCompileProgram() FAILED");
}
// Compilation log with errors/warnings
String programLog[] = new String[1];
nvrtcGetProgramLog(program, programLog);
String log = programLog[0].trim();
if (!log.isEmpty())
{
System.err.println("Program compilation log:\n" + log);
}
if (!OK) {
throw new IOException("Could not compile program");
}
// Get the PTX code of the compiled program (not the binary)
String[] ptx = new String[1];
nvrtcGetPTX(program, ptx);
nvrtcDestroyProgram(program);
// byte[] ptxData = ptx[0].getBytes();
ptxDataUnits[cunit] = ptx[0].getBytes();
System.out.println("ptxDataUnits["+cunit+"].length="+ptxDataUnits[cunit].length);
// System.out.println( ptx[0]);
}
JITOptions jitOptions = new JITOptions();
jitOptions.putInt(CU_JIT_LOG_VERBOSE, 1);
CUlinkState state = new CUlinkState();
cuLinkCreate(jitOptions, state);
cuLinkAddFile(state, CU_JIT_INPUT_LIBRARY, LIBRARY_PATH, jitOptions);
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
// cuLinkAddData(state, CU_JIT_INPUT_PTX, Pointer.to(ptxData), ptxData.length, "input.ptx", jitOptions); // CUDA_ERROR_INVALID_PTX
cuLinkAddData(state, CU_JIT_INPUT_PTX, Pointer.to(ptxDataUnits[cunit]), ptxDataUnits[cunit].length, "input"+cunit+".ptx", jitOptions); // CUDA_ERROR_INVALID_PTX
// cuLinkAddData(state, CU_JIT_INPUT_PTX, Pointer.to(ptxDataUnits[cunit]), ptxDataUnits[cunit].length, "input.ptx", jitOptions); // CUDA_ERROR_INVALID_PTX
}
// cuLinkAddFile(state, CU_JIT_INPUT_LIBRARY, LIBRARY_PATH, jitOptions);
long size[] = { 0 };
Pointer image = new Pointer();
JCudaDriver.setExceptionsEnabled(false);
int cuda_result = cuLinkComplete(state, image, size);
System.out.println("cuLinkComplete() -> "+cuda_result);
JCudaDriver.setExceptionsEnabled(true);
module = new CUmodule();
cuModuleLoadDataEx(module, image, 0, new int[0], Pointer.to(new int[0]));
cuLinkDestroy(state);
for (int i = 0; i < kernelNames.length; i++) {
// Find the function in the source by name, get its pointer
functions[i] = new CUfunction();
cuModuleGetFunction(functions[i] , module, kernelNames[i]);
}
return functions;
}
static String readFileAsString(String path)
{
byte[] encoded;
try {
encoded = Files.readAllBytes(Paths.get(path));
} catch (IOException e) {
return null;
}
return new String(encoded, StandardCharsets.UTF_8);
}
public void getTileSubcamOffsets( public void getTileSubcamOffsets(
final TpTask[] tp_tasks, // will use // modify to have offsets for 8 cameras final TpTask[] tp_tasks, // will use // modify to have offsets for 8 cameras
final GeometryCorrection geometryCorrection_main, final GeometryCorrection geometryCorrection_main,
...@@ -1937,9 +1973,6 @@ public class GPUTileProcessor { ...@@ -1937,9 +1973,6 @@ public class GPUTileProcessor {
return lpf; return lpf;
} }
} // end of public class GpuQuad
} // end of public class GPUTileProcessor
}
...@@ -452,7 +452,9 @@ public class TwoQuadCLT { ...@@ -452,7 +452,9 @@ public class TwoQuadCLT {
} }
public void processCLTQuadCorrPairsGpu( public void processCLTQuadCorrPairsGpu(
GPUTileProcessor gPUTileProcessor, // GPUTileProcessor gPUTileProcessor,
GPUTileProcessor.GpuQuad gpuQuad_main,
GPUTileProcessor.GpuQuad gpuQuad_aux,
QuadCLT quadCLT_main, QuadCLT quadCLT_main,
QuadCLT quadCLT_aux, QuadCLT quadCLT_aux,
CLTParameters clt_parameters, CLTParameters clt_parameters,
...@@ -516,7 +518,9 @@ public class TwoQuadCLT { ...@@ -516,7 +518,9 @@ public class TwoQuadCLT {
// Tempporarily processing individaully with the old code // Tempporarily processing individaully with the old code
processCLTQuadCorrPairGpu( processCLTQuadCorrPairGpu(
gPUTileProcessor, // GPUTileProcessor gPUTileProcessor, // gPUTileProcessor, // GPUTileProcessor gPUTileProcessor,
gpuQuad_main, // GPUTileProcessor.GpuQuad gpuQuad_main,
gpuQuad_aux, // GPUTileProcessor.GpuQuad gpuQuad_aux,
quadCLT_main, // QuadCLT quadCLT_main, quadCLT_main, // QuadCLT quadCLT_main,
quadCLT_aux, // QuadCLT quadCLT_aux, quadCLT_aux, // QuadCLT quadCLT_aux,
imp_srcs_main, // ImagePlus [] imp_quad_main, imp_srcs_main, // ImagePlus [] imp_quad_main,
...@@ -1934,7 +1938,9 @@ public class TwoQuadCLT { ...@@ -1934,7 +1938,9 @@ public class TwoQuadCLT {
} }
public ImagePlus [] processCLTQuadCorrPairGpu( public ImagePlus [] processCLTQuadCorrPairGpu(
GPUTileProcessor gPUTileProcessor, // GPUTileProcessor gPUTileProcessor,
GPUTileProcessor.GpuQuad gpuQuad_main,
GPUTileProcessor.GpuQuad gpuQuad_aux,
QuadCLT quadCLT_main, QuadCLT quadCLT_main,
QuadCLT quadCLT_aux, QuadCLT quadCLT_aux,
ImagePlus [] imp_quad_main, ImagePlus [] imp_quad_main,
...@@ -1980,17 +1986,17 @@ public class TwoQuadCLT { ...@@ -1980,17 +1986,17 @@ public class TwoQuadCLT {
image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_g), image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_g),
image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_m) image_dtt.floatGetCltLpfFd(clt_parameters.gpu_sigma_m)
}; };
gPUTileProcessor.setLpfRbg( gpuQuad_main.setLpfRbg(
lpf_rgb); lpf_rgb);
float [] lpf_flat = image_dtt.floatGetCltLpfFd(clt_parameters.getGpuCorrSigma(is_mono)); float [] lpf_flat = image_dtt.floatGetCltLpfFd(clt_parameters.getGpuCorrSigma(is_mono));
gPUTileProcessor.setLpfCorr( gpuQuad_main.setLpfCorr(
"lpf_corr", // String const_name, // "lpf_corr" "lpf_corr", // String const_name, // "lpf_corr"
lpf_flat); lpf_flat);
float [] lpf_rb_flat = image_dtt.floatGetCltLpfFd(clt_parameters.getGpuCorrRBSigma(is_mono)); float [] lpf_rb_flat = image_dtt.floatGetCltLpfFd(clt_parameters.getGpuCorrRBSigma(is_mono));
gPUTileProcessor.setLpfCorr( gpuQuad_main.setLpfCorr(
"lpf_rb_corr", // String const_name, // "lpf_corr" "lpf_rb_corr", // String const_name, // "lpf_corr"
lpf_rb_flat); lpf_rb_flat);
...@@ -2026,11 +2032,11 @@ public class TwoQuadCLT { ...@@ -2026,11 +2032,11 @@ public class TwoQuadCLT {
debugLevel); // final int debugLevel); debugLevel); // final int debugLevel);
gPUTileProcessor.setConvolutionKernels( gpuQuad_main.setConvolutionKernels(
(use_aux?quadCLT_aux.getCLTKernels() : quadCLT_main.getCLTKernels()), // double [][][][][][] clt_kernels, (use_aux?quadCLT_aux.getCLTKernels() : quadCLT_main.getCLTKernels()), // double [][][][][][] clt_kernels,
false); // boolean force) false); // boolean force)
gPUTileProcessor.setBayerImages( gpuQuad_main.setBayerImages(
(use_aux? quadCLT_aux.image_data: quadCLT_main.image_data), // double [][][] bayer_data, (use_aux? quadCLT_aux.image_data: quadCLT_main.image_data), // double [][][] bayer_data,
true); // boolean force); true); // boolean force);
...@@ -2040,7 +2046,7 @@ public class TwoQuadCLT { ...@@ -2040,7 +2046,7 @@ public class TwoQuadCLT {
clt_parameters.gpu_woi_ty, clt_parameters.gpu_woi_ty,
clt_parameters.gpu_woi_twidth, clt_parameters.gpu_woi_twidth,
clt_parameters.gpu_woi_theight); clt_parameters.gpu_woi_theight);
GPUTileProcessor.TpTask [] tp_tasks = gPUTileProcessor.setFullFrameImages( GPUTileProcessor.TpTask [] tp_tasks = gpuQuad_main.setFullFrameImages(
false, // boolean calc_offsets, // old way, now not needed with GPU calculation false, // boolean calc_offsets, // old way, now not needed with GPU calculation
twoi, // Rectangle woi, twoi, // Rectangle woi,
clt_parameters.gpu_woi_round, // boolean round_woi, clt_parameters.gpu_woi_round, // boolean round_woi,
...@@ -2096,26 +2102,14 @@ public class TwoQuadCLT { ...@@ -2096,26 +2102,14 @@ public class TwoQuadCLT {
} }
} }
gPUTileProcessor.setTasks( gpuQuad_main.setTasks(
tp_tasks, // TpTask [] tile_tasks, tp_tasks, // TpTask [] tile_tasks,
use_aux); // boolean use_aux) use_aux); // boolean use_aux)
gpuQuad_main.setGeometryCorrection(
// int [] corr_indices = gPUTileProcessor.getCorrTasks(
// tp_tasks);
// corr_indices array of integers to be passed to GPU
// gPUTileProcessor.setCorrIndices(corr_indices);
/*
int [] texture_indices = gPUTileProcessor.getTextureTasks(
tp_tasks);
gPUTileProcessor.setTextureIndices(
texture_indices);
*/
gPUTileProcessor.setGeometryCorrection(
quadCLT_main.getGeometryCorrection(), quadCLT_main.getGeometryCorrection(),
false); // boolean use_java_rByRDist) { // false - use newer GPU execCalcReverseDistortions); // once false); // boolean use_java_rByRDist) { // false - use newer GPU execCalcReverseDistortions); // once
gPUTileProcessor.setExtrinsicsVector(quadCLT_main.getGeometryCorrection().getCorrVector()); // for each new image gpuQuad_main.setExtrinsicsVector(quadCLT_main.getGeometryCorrection().getCorrVector()); // for each new image
/* // TODO: calculate from the camera geometry? /* // TODO: calculate from the camera geometry?
double[][] port_offsets = { // used only in textures to scale differences double[][] port_offsets = { // used only in textures to scale differences
...@@ -2129,33 +2123,33 @@ public class TwoQuadCLT { ...@@ -2129,33 +2123,33 @@ public class TwoQuadCLT {
System.out.println("\n------------ Running GPU "+NREPEAT+" times ----------------"); System.out.println("\n------------ Running GPU "+NREPEAT+" times ----------------");
long startGPU=System.nanoTime(); long startGPU=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) { for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execCalcReverseDistortions(); gpuQuad_main.execCalcReverseDistortions();
} }
long startRotDerivs=System.nanoTime(); long startRotDerivs=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) { for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execRotDerivs(); gpuQuad_main.execRotDerivs();
} }
long startTasksSetup=System.nanoTime(); long startTasksSetup=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) { for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execSetTilesOffsets(); gpuQuad_main.execSetTilesOffsets();
} }
long startDirectConvert=System.nanoTime(); long startDirectConvert=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) { for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execConvertDirect(); gpuQuad_main.execConvertDirect();
} }
// run imclt; // run imclt;
long startIMCLT=System.nanoTime(); long startIMCLT=System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) { for (int i = 0; i < NREPEAT; i++ ) {
gPUTileProcessor.execImcltRbgAll(quadCLT_main.isMonochrome()); gpuQuad_main.execImcltRbgAll(quadCLT_main.isMonochrome());
} }
long endImcltTime = System.nanoTime(); long endImcltTime = System.nanoTime();
// run correlation // run correlation
long startCorr2d=System.nanoTime(); // System.nanoTime(); long startCorr2d=System.nanoTime(); // System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execCorr2D( for (int i = 0; i < NREPEAT; i++ ) gpuQuad_main.execCorr2D(
scales,// double [] scales, scales,// double [] scales,
fat_zero, // double fat_zero); fat_zero, // double fat_zero);
clt_parameters.gpu_corr_rad); // int corr_radius clt_parameters.gpu_corr_rad); // int corr_radius
...@@ -2163,7 +2157,7 @@ public class TwoQuadCLT { ...@@ -2163,7 +2157,7 @@ public class TwoQuadCLT {
long endCorr2d = System.nanoTime(); long endCorr2d = System.nanoTime();
// run textures // run textures
long startTextures = System.nanoTime(); // System.nanoTime(); long startTextures = System.nanoTime(); // System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execTextures( for (int i = 0; i < NREPEAT; i++ ) gpuQuad_main.execTextures(
col_weights, // double [] color_weights, col_weights, // double [] color_weights,
quadCLT_main.isLwir(), // boolean is_lwir, quadCLT_main.isLwir(), // boolean is_lwir,
clt_parameters.min_shot, // double min_shot, // 10.0 clt_parameters.min_shot, // double min_shot, // 10.0
...@@ -2176,7 +2170,7 @@ public class TwoQuadCLT { ...@@ -2176,7 +2170,7 @@ public class TwoQuadCLT {
// run texturesRBGA // run texturesRBGA
long startTexturesRBGA = System.nanoTime(); // System.nanoTime(); long startTexturesRBGA = System.nanoTime(); // System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execRBGA( for (int i = 0; i < NREPEAT; i++ ) gpuQuad_main.execRBGA(
col_weights, // double [] color_weights, col_weights, // double [] color_weights,
quadCLT_main.isLwir(), // boolean is_lwir, quadCLT_main.isLwir(), // boolean is_lwir,
clt_parameters.min_shot, // double min_shot, // 10.0 clt_parameters.min_shot, // double min_shot, // 10.0
...@@ -2212,7 +2206,7 @@ public class TwoQuadCLT { ...@@ -2212,7 +2206,7 @@ public class TwoQuadCLT {
// get data back from GPU // get data back from GPU
float [][][] iclt_fimg = new float [GPUTileProcessor.NUM_CAMS][][]; float [][][] iclt_fimg = new float [GPUTileProcessor.NUM_CAMS][][];
for (int ncam = 0; ncam < iclt_fimg.length; ncam++) { for (int ncam = 0; ncam < iclt_fimg.length; ncam++) {
iclt_fimg[ncam] = gPUTileProcessor.getRBG(ncam); iclt_fimg[ncam] = gpuQuad_main.getRBG(ncam);
} }
int out_width = GPUTileProcessor.IMG_WIDTH + GPUTileProcessor.DTT_SIZE; int out_width = GPUTileProcessor.IMG_WIDTH + GPUTileProcessor.DTT_SIZE;
...@@ -2228,7 +2222,7 @@ public class TwoQuadCLT { ...@@ -2228,7 +2222,7 @@ public class TwoQuadCLT {
extra_titles[g * GPUTileProcessor.NUM_CAMS+ncam]= extra_group_titles[g]+"-"+ncam; extra_titles[g * GPUTileProcessor.NUM_CAMS+ncam]= extra_group_titles[g]+"-"+ncam;
} }
} }
float [][] extra = gPUTileProcessor.getExtra(); float [][] extra = gpuQuad_main.getExtra();
(new ShowDoubleFloatArrays()).showArrays( (new ShowDoubleFloatArrays()).showArrays(
extra, extra,
tilesX, tilesX,
...@@ -2260,8 +2254,8 @@ public class TwoQuadCLT { ...@@ -2260,8 +2254,8 @@ public class TwoQuadCLT {
//show_corr //show_corr
int [] wh = new int[2]; int [] wh = new int[2];
if (clt_parameters.show_corr) { if (clt_parameters.show_corr) {
int [] corr_indices = gPUTileProcessor.getCorrIndices(); int [] corr_indices = gpuQuad_main.getCorrIndices();
float [][] corr2D = gPUTileProcessor.getCorr2D( float [][] corr2D = gpuQuad_main.getCorr2D(
clt_parameters.gpu_corr_rad); // int corr_rad); clt_parameters.gpu_corr_rad); // int corr_rad);
// convert to 6-layer image using tasks // convert to 6-layer image using tasks
double [][] dbg_corr = GPUTileProcessor.getCorr2DView( double [][] dbg_corr = GPUTileProcessor.getCorr2DView(
...@@ -2344,7 +2338,7 @@ public class TwoQuadCLT { ...@@ -2344,7 +2338,7 @@ public class TwoQuadCLT {
// Use GPU prepared RBGA // Use GPU prepared RBGA
if (clt_parameters.show_rgba_color) { if (clt_parameters.show_rgba_color) {
Rectangle woi = new Rectangle(); Rectangle woi = new Rectangle();
float [][] rbga = gPUTileProcessor.getRBGA( float [][] rbga = gpuQuad_main.getRBGA(
(is_mono?1:3), // int num_colors, (is_mono?1:3), // int num_colors,
woi); woi);
(new ShowDoubleFloatArrays()).showArrays( (new ShowDoubleFloatArrays()).showArrays(
...@@ -2413,12 +2407,12 @@ public class TwoQuadCLT { ...@@ -2413,12 +2407,12 @@ public class TwoQuadCLT {
if (clt_parameters.show_rgba_color && (debugLevel > 100)) { // disabling if (clt_parameters.show_rgba_color && (debugLevel > 100)) { // disabling
int numcol = quadCLT_main.isMonochrome()?1:3; int numcol = quadCLT_main.isMonochrome()?1:3;
int ports = imp_quad_main.length; int ports = imp_quad_main.length;
int [] texture_indices = gPUTileProcessor.getTextureIndices(); int [] texture_indices = gpuQuad_main.getTextureIndices();
int num_src_slices = numcol + 1 + (clt_parameters.keep_weights?(ports + numcol + 1):0); // 12 ; // calculate int num_src_slices = numcol + 1 + (clt_parameters.keep_weights?(ports + numcol + 1):0); // 12 ; // calculate
// float [][][] ftextures = gPUTileProcessor.getTextures( // float [][][] ftextures = gPUTileProcessor.getTextures(
// (is_mono?1:3), // int num_colors, // (is_mono?1:3), // int num_colors,
// clt_parameters.keep_weights); // boolean keep_weights); // clt_parameters.keep_weights); // boolean keep_weights);
float [] flat_textures = gPUTileProcessor.getFlatTextures( float [] flat_textures = gpuQuad_main.getFlatTextures(
texture_indices.length, texture_indices.length,
(is_mono?1:3), // int num_colors, (is_mono?1:3), // int num_colors,
clt_parameters.keep_weights); // boolean keep_weights); clt_parameters.keep_weights); // boolean keep_weights);
...@@ -2447,7 +2441,7 @@ public class TwoQuadCLT { ...@@ -2447,7 +2441,7 @@ public class TwoQuadCLT {
} }
} }
} }
double [][][][] texture_tiles = gPUTileProcessor.doubleTextures( double [][][][] texture_tiles = gpuQuad_main.doubleTextures(
new Rectangle(0, 0, tilesX, tilesY), // Rectangle woi, new Rectangle(0, 0, tilesX, tilesY), // Rectangle woi,
texture_indices, // int [] indices, texture_indices, // int [] indices,
flat_textures, // float [][][] ftextures, flat_textures, // float [][][] ftextures,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment