Commit 72b6bdce authored by Andrey Filippov's avatar Andrey Filippov

Got 4 images converted, corrected, and converted back with JCUDA!

parent 184a23d0
......@@ -640,6 +640,7 @@ private Panel panel1,
panelClt_GPU.setLayout(new GridLayout(1, 0, 5, 5)); // rows, columns, vgap, hgap
addButton("JCUDA TEST", panelClt_GPU);
addButton("TF TEST", panelClt_GPU);
addButton("GPU files", panelClt_GPU, color_conf_process);
addButton("Rig8 gpu", panelClt_GPU, color_conf_process);
addButton("ShowGPU", panelClt_GPU, color_conf_process);
add(panelClt_GPU);
......@@ -4579,12 +4580,19 @@ private Panel panel1,
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
getPairImages2();
return;
/* ======================================================================== */
} else if (label.equals("GPU files")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
generateGPUDebugFiles();
return;
/* ======================================================================== */
} else if (label.equals("Rig8 gpu")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
getPairImages2Gpu();
return;
/* ======================================================================== */
} else if (label.equals("ShowGPU")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
......@@ -5143,6 +5151,52 @@ private Panel panel1,
return true;
}
public boolean generateGPUDebugFiles() {
if (!prepareRigImages()) return false;
String configPath=getSaveCongigPath();
if (configPath.equals("ABORT")) return false;
if (DEBUG_LEVEL > -2){
System.out.println("++++++++++++++ Calculating combined correlations ++++++++++++++");
}
// reset if ran after 3d model to save memory
if (QUAD_CLT.tp != null) {
QUAD_CLT.tp.clt_3d_passes = null; // resetCLTPasses();
}
if (QUAD_CLT_AUX.tp != null) {
QUAD_CLT_AUX.tp.clt_3d_passes = null; // resetCLTPasses();
}
try {
TWO_QUAD_CLT.prepareFilesForGPUDebug(
QUAD_CLT, // QuadCLT quadCLT_main,
QUAD_CLT_AUX, // QuadCLT quadCLT_aux,
CLT_PARAMETERS, // EyesisCorrectionParameters.DCTParameters dct_parameters,
DEBAYER_PARAMETERS, //EyesisCorrectionParameters.DebayerParameters debayerParameters,
COLOR_PROC_PARAMETERS, //EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
// CHANNEL_GAINS_PARAMETERS, //CorrectionColorProc.ColorGainsParameters channelGainParameters,
// CHANNEL_GAINS_PARAMETERS_AUX, //CorrectionColorProc.ColorGainsParameters channelGainParameters_aux,
RGB_PARAMETERS, //EyesisCorrectionParameters.RGBParameters rgbParameters,
THREADS_MAX, //final int threadsMax, // maximal number of threads to launch
UPDATE_STATUS, //final boolean updateStatus,
DEBUG_LEVEL);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} //final int debugLevel);
QUAD_CLT.tp.clt_3d_passes = null; // resetCLTPasses(); // so running "Ground truth" after would be OK
QUAD_CLT_AUX.tp.clt_3d_passes = null; //.resetCLTPasses();
if (configPath!=null) {
saveTimestampedProperties( // save config again
configPath, // full path or null
null, // use as default directory if path==null
true,
PROPERTIES);
}
return true;
}
public boolean getPairImages2Gpu() {
if (!prepareRigImages()) return false;
String configPath=getSaveCongigPath();
......@@ -5165,6 +5219,7 @@ private Panel panel1,
System.out.println("Failed to initialize GPU class");
// TODO Auto-generated catch block
e.printStackTrace();
return false;
} //final int debugLevel);
}
......@@ -5186,6 +5241,7 @@ private Panel panel1,
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
return false;
} //final int debugLevel);
QUAD_CLT.tp.clt_3d_passes = null; // resetCLTPasses(); // so running "Ground truth" after would be OK
QUAD_CLT_AUX.tp.clt_3d_passes = null; //.resetCLTPasses();
......@@ -5203,7 +5259,6 @@ private Panel panel1,
public boolean rigPlanes() {
if ((QUAD_CLT == null) || (QUAD_CLT.tp == null) || (QUAD_CLT.tp.clt_3d_passes == null) || (QUAD_CLT.tp.clt_3d_passes.size() == 0)) {
String msg = "DSI data is not available. Please run \"CLT 3D\" first";
......
This diff is collapsed.
......@@ -4489,6 +4489,56 @@ public class QuadCLT {
return rslt;
}
// float
public ImagePlus linearStackToColor(
EyesisCorrectionParameters.CLTParameters clt_parameters,
EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
EyesisCorrectionParameters.RGBParameters rgbParameters,
String name,
String suffix, // such as disparity=...
boolean toRGB,
boolean bpp16, // 16-bit per channel color mode for result
boolean saveShowIntermediate, // save/show if set globally
boolean saveShowFinal, // save/show result (color image?)
float [][] iclt_data,
int width, // int tilesX,
int height, // int tilesY,
double scaleExposure,
int debugLevel
)
{
showDoubleFloatArrays sdfa_instance = new showDoubleFloatArrays(); // just for debugging?
// convert to ImageStack of 3 slices
String [] sliceNames = {"red", "blue", "green"};
float [] alpha = null; // (0..1.0)
float [][] rgb_in = {iclt_data[0],iclt_data[1],iclt_data[2]};
if (iclt_data.length > 3) alpha = iclt_data[3];
ImageStack stack = sdfa_instance.makeStack(
rgb_in, // iclt_data,
width, // (tilesX + 0) * clt_parameters.transform_size,
height, // (tilesY + 0) * clt_parameters.transform_size,
sliceNames, // or use null to get chn-nn slice names
true); // replace NaN with 0.0
return linearStackToColor(
clt_parameters, // EyesisCorrectionParameters.CLTParameters clt_parameters,
colorProcParameters, // EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
rgbParameters, // EyesisCorrectionParameters.RGBParameters rgbParameters,
name, // String name,
suffix, // String suffix, // such as disparity=...
toRGB, // boolean toRGB,
bpp16, // boolean bpp16, // 16-bit per channel color mode for result
saveShowIntermediate, // boolean saveShowIntermediate, // save/show if set globally
saveShowFinal, // boolean saveShowFinal, // save/show result (color image?)
stack, // ImageStack stack,
alpha, // float [] alpha_pixels,
width, // int width, // int tilesX,
height, // int height, // int tilesY,
scaleExposure, // double scaleExposure,
debugLevel); //int debugLevel
}
// double data
public ImagePlus linearStackToColor(
EyesisCorrectionParameters.CLTParameters clt_parameters,
EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
......@@ -4511,19 +4561,64 @@ public class QuadCLT {
String [] sliceNames = {"red", "blue", "green"};
double [] alpha = null; // (0..1.0)
double [][] rgb_in = {iclt_data[0],iclt_data[1],iclt_data[2]};
if (iclt_data.length > 3) alpha = iclt_data[3];
float [] alpha_pixels = null;
if (iclt_data.length > 3) {
alpha = iclt_data[3];
if (alpha != null){
alpha_pixels = new float [alpha.length];
for (int i = 0; i <alpha.length; i++){
alpha_pixels[i] = (float) alpha[i];
}
}
}
ImageStack stack = sdfa_instance.makeStack(
rgb_in, // iclt_data,
width, // (tilesX + 0) * clt_parameters.transform_size,
height, // (tilesY + 0) * clt_parameters.transform_size,
sliceNames, // or use null to get chn-nn slice names
true); // replace NaN with 0.0
return linearStackToColor(
clt_parameters, // EyesisCorrectionParameters.CLTParameters clt_parameters,
colorProcParameters, // EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
rgbParameters, // EyesisCorrectionParameters.RGBParameters rgbParameters,
name, // String name,
suffix, // String suffix, // such as disparity=...
toRGB, // boolean toRGB,
bpp16, // boolean bpp16, // 16-bit per channel color mode for result
saveShowIntermediate, // boolean saveShowIntermediate, // save/show if set globally
saveShowFinal, // boolean saveShowFinal, // save/show result (color image?)
stack, // ImageStack stack,
alpha_pixels, // float [] alpha_pixels,
width, // int width, // int tilesX,
height, // int height, // int tilesY,
scaleExposure, // double scaleExposure,
debugLevel); //int debugLevel
}
public ImagePlus linearStackToColor(
EyesisCorrectionParameters.CLTParameters clt_parameters,
EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
EyesisCorrectionParameters.RGBParameters rgbParameters,
String name,
String suffix, // such as disparity=...
boolean toRGB,
boolean bpp16, // 16-bit per channel color mode for result
boolean saveShowIntermediate, // save/show if set globally
boolean saveShowFinal, // save/show result (color image?)
ImageStack stack,
float [] alpha_pixels,
int width, // int tilesX,
int height, // int tilesY,
double scaleExposure,
int debugLevel
)
{
// showDoubleFloatArrays sdfa_instance = new showDoubleFloatArrays(); // just for debugging?
if (debugLevel > -1) { // 0){
double [] chn_avg = {0.0,0.0,0.0};
float [] pixels;
// int width = stack.getWidth();
// int height = stack.getHeight();
for (int c = 0; c <3; c++){
pixels = (float[]) stack.getPixels(c+1);
for (int i = 0; i<pixels.length; i++){
......@@ -4610,11 +4705,7 @@ public class QuadCLT {
titleFull=name+"-YPrPb"+suffix;
if (debugLevel > 1) System.out.println("Using full stack, including YPbPr");
}
if (alpha != null){
float [] alpha_pixels = new float [alpha.length];
for (int i = 0; i <alpha.length; i++){
alpha_pixels[i] = (float) alpha[i];
}
if (alpha_pixels != null){
stack.addSlice("alpha",alpha_pixels);
}
......@@ -4674,6 +4765,8 @@ public class QuadCLT {
}
public void apply_fine_corr(
double [][][] corr,
int debugLevel)
......
This diff is collapsed.
......@@ -213,6 +213,30 @@ import ij.process.ImageProcessor;
return array_stack;
}
public ImageStack makeStack(float[][] pixels, int width, int height, String [] titles, boolean noNaN) {
float [] fpixels;
ImageStack array_stack=new ImageStack(width,height);
for (int i=0;i<pixels.length;i++) if (pixels[i]!=null) {
if (pixels[i].length!=(width*height)){
System.out.println("showArrays(): pixels["+i+"].length="+pixels[i].length+" != width (+"+width+") * height("+height+")="+(width*height));
return null;
}
if (noNaN){
fpixels=new float[pixels[i].length];
for (int j=0;j<fpixels.length;j++) fpixels[j]= Float.isNaN(pixels[i][j])? 0.0F: ((float)pixels[i][j]);
} else {
fpixels=pixels[i];
}
if (titles!=null){
array_stack.addSlice(titles[i], fpixels);
} else {
array_stack.addSlice("chn-"+i, fpixels);
}
}
return array_stack;
}
public ImagePlus [] makeArrays(double[][] pixels, int width, int height, String title) {
int i,j;
float [] fpixels;
......
......@@ -36,9 +36,24 @@
* \brief Top level of the Tile Processor for frequency domain
*/
// Avoiding includes in jcuda, all source files will be merged
#ifndef JCUDA
#pragma once
#include "dtt8x8.cuh"
#define THREADSX (DTT_SIZE)
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define NUM_CAMS 4
#define NUM_COLORS 3
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#endif
//#define IMCLT14
//#define NOICLT 1
//#define TEST_IMCLT
......@@ -70,7 +85,7 @@
// Removed rest of NOICLT : Average run time =943.456177 ms
// Added lpf: Average run time =1046.101318 ms (0.1 sec, 10%) - can be combined with the PSF kernel
//#define USE_UMUL24
#define TILES_PER_BLOCK 4
////#define TILES_PER_BLOCK 4
//Average run time =5155.922852 ms
//Average run time =1166.388306 ms
//Average run time =988.750977 ms
......@@ -78,25 +93,16 @@
//Average run time =9656.743164 ms
// Average run time =9422.057617 ms (reducing divergence)
//#define TILES_PER_BLOCK 1
#define THREADS_PER_TILE 8
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define NUM_CAMS 4
#define NUM_COLORS 3
#define KERNELS_LSTEP 4
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define IMAGE_TILE_SIDE 18
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
//#define THREADS_PER_TILE 8
//#define IMCLT_THREADS_PER_TILE 16
//#define IMCLT_TILES_PER_BLOCK 4
#define KERNELS_STEP (1 << KERNELS_LSTEP)
#define TILESX (IMG_WIDTH / DTT_SIZE)
#define TILESY (IMG_HEIGHT / DTT_SIZE)
// increase row length by 1 so vertical passes will use different ports
#define THREADSX (DTT_SIZE)
#define DTT_SIZE1 (DTT_SIZE + 1)
#define DTT_SIZE2 (2 * DTT_SIZE)
#define DTT_SIZE21 (DTT_SIZE2 + 1)
......@@ -124,9 +130,10 @@
// struct tp_task
//#define TASK_SIZE 12
struct tp_task {
long task;
short ty;
short tx;
int task;
int txy;
// short ty;
// short tx;
float xy[NUM_CAMS][2];
};
struct CltExtra{
......@@ -350,8 +357,9 @@ __device__ void imclt_plane(
const size_t dstride); // in floats (pixels)
extern "C"
__global__ void tileProcessor(
struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS],
__global__ void convert_correct_tiles(
// struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct paraeters
float ** gpu_kernel_offsets, // [NUM_CAMS],
float ** gpu_kernels, // [NUM_CAMS],
float ** gpu_images, // [NUM_CAMS],
struct tp_task * gpu_tasks,
......@@ -361,6 +369,7 @@ __global__ void tileProcessor(
int lpf_mask) // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
{
// struct CltExtra* gpu_kernel_offsets = (struct CltExtra*) vgpu_kernel_offsets;
dim3 t = threadIdx;
int tile_in_block = threadIdx.y;
int task_num = blockIdx.x * TILES_PER_BLOCK + tile_in_block;
......@@ -370,8 +379,9 @@ __global__ void tileProcessor(
__shared__ struct tp_task tt [TILES_PER_BLOCK];
// Copy task data to shared memory
tt[tile_in_block].task = gpu_task -> task;
tt[tile_in_block].tx = gpu_task -> tx;
tt[tile_in_block].ty = gpu_task -> ty;
// tt[tile_in_block].tx = gpu_task -> tx;
// tt[tile_in_block].ty = gpu_task -> ty;
tt[tile_in_block].txy = gpu_task -> txy;
int thread0 = threadIdx.x & 1;
int thread12 = threadIdx.x >>1;
if (thread12 < NUM_CAMS) {
......@@ -408,7 +418,7 @@ __global__ void tileProcessor(
for (int ncam = 0; ncam < NUM_CAMS; ncam++){
for (int color = 0; color < NUM_COLORS; color++){
convertCorrectTile(
gpu_kernel_offsets[ncam], // float * gpu_kernel_offsets,
(struct CltExtra*)(gpu_kernel_offsets[ncam]), // struct CltExtra* gpu_kernel_offsets,
gpu_kernels[ncam], // float * gpu_kernels,
gpu_images[ncam], // float * gpu_images,
gpu_clt[ncam], // float * gpu_clt,
......@@ -416,7 +426,8 @@ __global__ void tileProcessor(
lpf_mask, // const int lpf_mask,
tt[tile_in_block].xy[ncam][0], // const float centerX,
tt[tile_in_block].xy[ncam][1], // const float centerY,
tt[tile_in_block].tx | (tt[tile_in_block].ty <<16), // const int txy,
// tt[tile_in_block].tx | (tt[tile_in_block].ty <<16), // const int txy,
tt[tile_in_block].txy, // const int txy,
dstride, // size_t dstride, // in floats (pixels)
(float * )(clt_tile [tile_in_block]), // float clt_tile [TILES_PER_BLOCK][NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE])
(float * )(clt_kernels[tile_in_block]), // float clt_tile [NUM_COLORS][4][DTT_SIZE][DTT_SIZE],
......
......@@ -44,12 +44,13 @@
* This file is cpompatible with both runtime and driver API, runtime is used for development
* with Nvidia Nsight, driver API when calling these kernels from Java
*/
#ifndef JCUDA
#define DTT_SIZE 8
#endif
#pragma once
#define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
#define DTT_SIZE 8
//#define CUDART_INF_F __int_as_float(0x7f800000)
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment