Commit 72b6bdce authored by Andrey Filippov's avatar Andrey Filippov

Got 4 images converted, corrected, and converted back with JCUDA!

parent 184a23d0
...@@ -640,6 +640,7 @@ private Panel panel1, ...@@ -640,6 +640,7 @@ private Panel panel1,
panelClt_GPU.setLayout(new GridLayout(1, 0, 5, 5)); // rows, columns, vgap, hgap panelClt_GPU.setLayout(new GridLayout(1, 0, 5, 5)); // rows, columns, vgap, hgap
addButton("JCUDA TEST", panelClt_GPU); addButton("JCUDA TEST", panelClt_GPU);
addButton("TF TEST", panelClt_GPU); addButton("TF TEST", panelClt_GPU);
addButton("GPU files", panelClt_GPU, color_conf_process);
addButton("Rig8 gpu", panelClt_GPU, color_conf_process); addButton("Rig8 gpu", panelClt_GPU, color_conf_process);
addButton("ShowGPU", panelClt_GPU, color_conf_process); addButton("ShowGPU", panelClt_GPU, color_conf_process);
add(panelClt_GPU); add(panelClt_GPU);
...@@ -4579,12 +4580,19 @@ private Panel panel1, ...@@ -4579,12 +4580,19 @@ private Panel panel1,
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL); EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
getPairImages2(); getPairImages2();
return; return;
/* ======================================================================== */
} else if (label.equals("GPU files")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
generateGPUDebugFiles();
return;
/* ======================================================================== */ /* ======================================================================== */
} else if (label.equals("Rig8 gpu")) { } else if (label.equals("Rig8 gpu")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL; DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL); EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
getPairImages2Gpu(); getPairImages2Gpu();
return; return;
/* ======================================================================== */ /* ======================================================================== */
} else if (label.equals("ShowGPU")) { } else if (label.equals("ShowGPU")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL; DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
...@@ -5143,6 +5151,52 @@ private Panel panel1, ...@@ -5143,6 +5151,52 @@ private Panel panel1,
return true; return true;
} }
public boolean generateGPUDebugFiles() {
if (!prepareRigImages()) return false;
String configPath=getSaveCongigPath();
if (configPath.equals("ABORT")) return false;
if (DEBUG_LEVEL > -2){
System.out.println("++++++++++++++ Calculating combined correlations ++++++++++++++");
}
// reset if ran after 3d model to save memory
if (QUAD_CLT.tp != null) {
QUAD_CLT.tp.clt_3d_passes = null; // resetCLTPasses();
}
if (QUAD_CLT_AUX.tp != null) {
QUAD_CLT_AUX.tp.clt_3d_passes = null; // resetCLTPasses();
}
try {
TWO_QUAD_CLT.prepareFilesForGPUDebug(
QUAD_CLT, // QuadCLT quadCLT_main,
QUAD_CLT_AUX, // QuadCLT quadCLT_aux,
CLT_PARAMETERS, // EyesisCorrectionParameters.DCTParameters dct_parameters,
DEBAYER_PARAMETERS, //EyesisCorrectionParameters.DebayerParameters debayerParameters,
COLOR_PROC_PARAMETERS, //EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
// CHANNEL_GAINS_PARAMETERS, //CorrectionColorProc.ColorGainsParameters channelGainParameters,
// CHANNEL_GAINS_PARAMETERS_AUX, //CorrectionColorProc.ColorGainsParameters channelGainParameters_aux,
RGB_PARAMETERS, //EyesisCorrectionParameters.RGBParameters rgbParameters,
THREADS_MAX, //final int threadsMax, // maximal number of threads to launch
UPDATE_STATUS, //final boolean updateStatus,
DEBUG_LEVEL);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
} //final int debugLevel);
QUAD_CLT.tp.clt_3d_passes = null; // resetCLTPasses(); // so running "Ground truth" after would be OK
QUAD_CLT_AUX.tp.clt_3d_passes = null; //.resetCLTPasses();
if (configPath!=null) {
saveTimestampedProperties( // save config again
configPath, // full path or null
null, // use as default directory if path==null
true,
PROPERTIES);
}
return true;
}
public boolean getPairImages2Gpu() { public boolean getPairImages2Gpu() {
if (!prepareRigImages()) return false; if (!prepareRigImages()) return false;
String configPath=getSaveCongigPath(); String configPath=getSaveCongigPath();
...@@ -5165,6 +5219,7 @@ private Panel panel1, ...@@ -5165,6 +5219,7 @@ private Panel panel1,
System.out.println("Failed to initialize GPU class"); System.out.println("Failed to initialize GPU class");
// TODO Auto-generated catch block // TODO Auto-generated catch block
e.printStackTrace(); e.printStackTrace();
return false;
} //final int debugLevel); } //final int debugLevel);
} }
...@@ -5186,6 +5241,7 @@ private Panel panel1, ...@@ -5186,6 +5241,7 @@ private Panel panel1,
} catch (Exception e) { } catch (Exception e) {
// TODO Auto-generated catch block // TODO Auto-generated catch block
e.printStackTrace(); e.printStackTrace();
return false;
} //final int debugLevel); } //final int debugLevel);
QUAD_CLT.tp.clt_3d_passes = null; // resetCLTPasses(); // so running "Ground truth" after would be OK QUAD_CLT.tp.clt_3d_passes = null; // resetCLTPasses(); // so running "Ground truth" after would be OK
QUAD_CLT_AUX.tp.clt_3d_passes = null; //.resetCLTPasses(); QUAD_CLT_AUX.tp.clt_3d_passes = null; //.resetCLTPasses();
...@@ -5203,7 +5259,6 @@ private Panel panel1, ...@@ -5203,7 +5259,6 @@ private Panel panel1,
public boolean rigPlanes() { public boolean rigPlanes() {
if ((QUAD_CLT == null) || (QUAD_CLT.tp == null) || (QUAD_CLT.tp.clt_3d_passes == null) || (QUAD_CLT.tp.clt_3d_passes.size() == 0)) { if ((QUAD_CLT == null) || (QUAD_CLT.tp == null) || (QUAD_CLT.tp.clt_3d_passes == null) || (QUAD_CLT.tp.clt_3d_passes.size() == 0)) {
String msg = "DSI data is not available. Please run \"CLT 3D\" first"; String msg = "DSI data is not available. Please run \"CLT 3D\" first";
......
This diff is collapsed.
...@@ -4489,6 +4489,56 @@ public class QuadCLT { ...@@ -4489,6 +4489,56 @@ public class QuadCLT {
return rslt; return rslt;
} }
// float
public ImagePlus linearStackToColor(
EyesisCorrectionParameters.CLTParameters clt_parameters,
EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
EyesisCorrectionParameters.RGBParameters rgbParameters,
String name,
String suffix, // such as disparity=...
boolean toRGB,
boolean bpp16, // 16-bit per channel color mode for result
boolean saveShowIntermediate, // save/show if set globally
boolean saveShowFinal, // save/show result (color image?)
float [][] iclt_data,
int width, // int tilesX,
int height, // int tilesY,
double scaleExposure,
int debugLevel
)
{
showDoubleFloatArrays sdfa_instance = new showDoubleFloatArrays(); // just for debugging?
// convert to ImageStack of 3 slices
String [] sliceNames = {"red", "blue", "green"};
float [] alpha = null; // (0..1.0)
float [][] rgb_in = {iclt_data[0],iclt_data[1],iclt_data[2]};
if (iclt_data.length > 3) alpha = iclt_data[3];
ImageStack stack = sdfa_instance.makeStack(
rgb_in, // iclt_data,
width, // (tilesX + 0) * clt_parameters.transform_size,
height, // (tilesY + 0) * clt_parameters.transform_size,
sliceNames, // or use null to get chn-nn slice names
true); // replace NaN with 0.0
return linearStackToColor(
clt_parameters, // EyesisCorrectionParameters.CLTParameters clt_parameters,
colorProcParameters, // EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
rgbParameters, // EyesisCorrectionParameters.RGBParameters rgbParameters,
name, // String name,
suffix, // String suffix, // such as disparity=...
toRGB, // boolean toRGB,
bpp16, // boolean bpp16, // 16-bit per channel color mode for result
saveShowIntermediate, // boolean saveShowIntermediate, // save/show if set globally
saveShowFinal, // boolean saveShowFinal, // save/show result (color image?)
stack, // ImageStack stack,
alpha, // float [] alpha_pixels,
width, // int width, // int tilesX,
height, // int height, // int tilesY,
scaleExposure, // double scaleExposure,
debugLevel); //int debugLevel
}
// double data
public ImagePlus linearStackToColor( public ImagePlus linearStackToColor(
EyesisCorrectionParameters.CLTParameters clt_parameters, EyesisCorrectionParameters.CLTParameters clt_parameters,
EyesisCorrectionParameters.ColorProcParameters colorProcParameters, EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
...@@ -4511,19 +4561,64 @@ public class QuadCLT { ...@@ -4511,19 +4561,64 @@ public class QuadCLT {
String [] sliceNames = {"red", "blue", "green"}; String [] sliceNames = {"red", "blue", "green"};
double [] alpha = null; // (0..1.0) double [] alpha = null; // (0..1.0)
double [][] rgb_in = {iclt_data[0],iclt_data[1],iclt_data[2]}; double [][] rgb_in = {iclt_data[0],iclt_data[1],iclt_data[2]};
if (iclt_data.length > 3) alpha = iclt_data[3]; float [] alpha_pixels = null;
if (iclt_data.length > 3) {
alpha = iclt_data[3];
if (alpha != null){
alpha_pixels = new float [alpha.length];
for (int i = 0; i <alpha.length; i++){
alpha_pixels[i] = (float) alpha[i];
}
}
}
ImageStack stack = sdfa_instance.makeStack( ImageStack stack = sdfa_instance.makeStack(
rgb_in, // iclt_data, rgb_in, // iclt_data,
width, // (tilesX + 0) * clt_parameters.transform_size, width, // (tilesX + 0) * clt_parameters.transform_size,
height, // (tilesY + 0) * clt_parameters.transform_size, height, // (tilesY + 0) * clt_parameters.transform_size,
sliceNames, // or use null to get chn-nn slice names sliceNames, // or use null to get chn-nn slice names
true); // replace NaN with 0.0 true); // replace NaN with 0.0
return linearStackToColor(
clt_parameters, // EyesisCorrectionParameters.CLTParameters clt_parameters,
colorProcParameters, // EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
rgbParameters, // EyesisCorrectionParameters.RGBParameters rgbParameters,
name, // String name,
suffix, // String suffix, // such as disparity=...
toRGB, // boolean toRGB,
bpp16, // boolean bpp16, // 16-bit per channel color mode for result
saveShowIntermediate, // boolean saveShowIntermediate, // save/show if set globally
saveShowFinal, // boolean saveShowFinal, // save/show result (color image?)
stack, // ImageStack stack,
alpha_pixels, // float [] alpha_pixels,
width, // int width, // int tilesX,
height, // int height, // int tilesY,
scaleExposure, // double scaleExposure,
debugLevel); //int debugLevel
}
public ImagePlus linearStackToColor(
EyesisCorrectionParameters.CLTParameters clt_parameters,
EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
EyesisCorrectionParameters.RGBParameters rgbParameters,
String name,
String suffix, // such as disparity=...
boolean toRGB,
boolean bpp16, // 16-bit per channel color mode for result
boolean saveShowIntermediate, // save/show if set globally
boolean saveShowFinal, // save/show result (color image?)
ImageStack stack,
float [] alpha_pixels,
int width, // int tilesX,
int height, // int tilesY,
double scaleExposure,
int debugLevel
)
{
// showDoubleFloatArrays sdfa_instance = new showDoubleFloatArrays(); // just for debugging?
if (debugLevel > -1) { // 0){ if (debugLevel > -1) { // 0){
double [] chn_avg = {0.0,0.0,0.0}; double [] chn_avg = {0.0,0.0,0.0};
float [] pixels; float [] pixels;
// int width = stack.getWidth();
// int height = stack.getHeight();
for (int c = 0; c <3; c++){ for (int c = 0; c <3; c++){
pixels = (float[]) stack.getPixels(c+1); pixels = (float[]) stack.getPixels(c+1);
for (int i = 0; i<pixels.length; i++){ for (int i = 0; i<pixels.length; i++){
...@@ -4610,11 +4705,7 @@ public class QuadCLT { ...@@ -4610,11 +4705,7 @@ public class QuadCLT {
titleFull=name+"-YPrPb"+suffix; titleFull=name+"-YPrPb"+suffix;
if (debugLevel > 1) System.out.println("Using full stack, including YPbPr"); if (debugLevel > 1) System.out.println("Using full stack, including YPbPr");
} }
if (alpha != null){ if (alpha_pixels != null){
float [] alpha_pixels = new float [alpha.length];
for (int i = 0; i <alpha.length; i++){
alpha_pixels[i] = (float) alpha[i];
}
stack.addSlice("alpha",alpha_pixels); stack.addSlice("alpha",alpha_pixels);
} }
...@@ -4674,6 +4765,8 @@ public class QuadCLT { ...@@ -4674,6 +4765,8 @@ public class QuadCLT {
} }
public void apply_fine_corr( public void apply_fine_corr(
double [][][] corr, double [][][] corr,
int debugLevel) int debugLevel)
......
This diff is collapsed.
...@@ -213,6 +213,30 @@ import ij.process.ImageProcessor; ...@@ -213,6 +213,30 @@ import ij.process.ImageProcessor;
return array_stack; return array_stack;
} }
public ImageStack makeStack(float[][] pixels, int width, int height, String [] titles, boolean noNaN) {
float [] fpixels;
ImageStack array_stack=new ImageStack(width,height);
for (int i=0;i<pixels.length;i++) if (pixels[i]!=null) {
if (pixels[i].length!=(width*height)){
System.out.println("showArrays(): pixels["+i+"].length="+pixels[i].length+" != width (+"+width+") * height("+height+")="+(width*height));
return null;
}
if (noNaN){
fpixels=new float[pixels[i].length];
for (int j=0;j<fpixels.length;j++) fpixels[j]= Float.isNaN(pixels[i][j])? 0.0F: ((float)pixels[i][j]);
} else {
fpixels=pixels[i];
}
if (titles!=null){
array_stack.addSlice(titles[i], fpixels);
} else {
array_stack.addSlice("chn-"+i, fpixels);
}
}
return array_stack;
}
public ImagePlus [] makeArrays(double[][] pixels, int width, int height, String title) { public ImagePlus [] makeArrays(double[][] pixels, int width, int height, String title) {
int i,j; int i,j;
float [] fpixels; float [] fpixels;
......
...@@ -36,9 +36,24 @@ ...@@ -36,9 +36,24 @@
* \brief Top level of the Tile Processor for frequency domain * \brief Top level of the Tile Processor for frequency domain
*/ */
// Avoiding includes in jcuda, all source files will be merged
#ifndef JCUDA
#pragma once #pragma once
#include "dtt8x8.cuh" #include "dtt8x8.cuh"
#define THREADSX (DTT_SIZE)
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define NUM_CAMS 4
#define NUM_COLORS 3
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define IMCLT_THREADS_PER_TILE 16
#define IMCLT_TILES_PER_BLOCK 4
#endif
//#define IMCLT14 //#define IMCLT14
//#define NOICLT 1 //#define NOICLT 1
//#define TEST_IMCLT //#define TEST_IMCLT
...@@ -70,7 +85,7 @@ ...@@ -70,7 +85,7 @@
// Removed rest of NOICLT : Average run time =943.456177 ms // Removed rest of NOICLT : Average run time =943.456177 ms
// Added lpf: Average run time =1046.101318 ms (0.1 sec, 10%) - can be combined with the PSF kernel // Added lpf: Average run time =1046.101318 ms (0.1 sec, 10%) - can be combined with the PSF kernel
//#define USE_UMUL24 //#define USE_UMUL24
#define TILES_PER_BLOCK 4 ////#define TILES_PER_BLOCK 4
//Average run time =5155.922852 ms //Average run time =5155.922852 ms
//Average run time =1166.388306 ms //Average run time =1166.388306 ms
//Average run time =988.750977 ms //Average run time =988.750977 ms
...@@ -78,25 +93,16 @@ ...@@ -78,25 +93,16 @@
//Average run time =9656.743164 ms //Average run time =9656.743164 ms
// Average run time =9422.057617 ms (reducing divergence) // Average run time =9422.057617 ms (reducing divergence)
//#define TILES_PER_BLOCK 1 //#define TILES_PER_BLOCK 1
#define THREADS_PER_TILE 8
#define IMG_WIDTH 2592
#define IMG_HEIGHT 1936
#define NUM_CAMS 4
#define NUM_COLORS 3
#define KERNELS_LSTEP 4
#define KERNELS_HOR 164
#define KERNELS_VERT 123
#define IMAGE_TILE_SIDE 18
#define IMCLT_THREADS_PER_TILE 16 //#define THREADS_PER_TILE 8
#define IMCLT_TILES_PER_BLOCK 4 //#define IMCLT_THREADS_PER_TILE 16
//#define IMCLT_TILES_PER_BLOCK 4
#define KERNELS_STEP (1 << KERNELS_LSTEP) #define KERNELS_STEP (1 << KERNELS_LSTEP)
#define TILESX (IMG_WIDTH / DTT_SIZE) #define TILESX (IMG_WIDTH / DTT_SIZE)
#define TILESY (IMG_HEIGHT / DTT_SIZE) #define TILESY (IMG_HEIGHT / DTT_SIZE)
// increase row length by 1 so vertical passes will use different ports // increase row length by 1 so vertical passes will use different ports
#define THREADSX (DTT_SIZE)
#define DTT_SIZE1 (DTT_SIZE + 1) #define DTT_SIZE1 (DTT_SIZE + 1)
#define DTT_SIZE2 (2 * DTT_SIZE) #define DTT_SIZE2 (2 * DTT_SIZE)
#define DTT_SIZE21 (DTT_SIZE2 + 1) #define DTT_SIZE21 (DTT_SIZE2 + 1)
...@@ -124,9 +130,10 @@ ...@@ -124,9 +130,10 @@
// struct tp_task // struct tp_task
//#define TASK_SIZE 12 //#define TASK_SIZE 12
struct tp_task { struct tp_task {
long task; int task;
short ty; int txy;
short tx; // short ty;
// short tx;
float xy[NUM_CAMS][2]; float xy[NUM_CAMS][2];
}; };
struct CltExtra{ struct CltExtra{
...@@ -350,8 +357,9 @@ __device__ void imclt_plane( ...@@ -350,8 +357,9 @@ __device__ void imclt_plane(
const size_t dstride); // in floats (pixels) const size_t dstride); // in floats (pixels)
extern "C" extern "C"
__global__ void tileProcessor( __global__ void convert_correct_tiles(
struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct paraeters
float ** gpu_kernel_offsets, // [NUM_CAMS],
float ** gpu_kernels, // [NUM_CAMS], float ** gpu_kernels, // [NUM_CAMS],
float ** gpu_images, // [NUM_CAMS], float ** gpu_images, // [NUM_CAMS],
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
...@@ -361,6 +369,7 @@ __global__ void tileProcessor( ...@@ -361,6 +369,7 @@ __global__ void tileProcessor(
int lpf_mask) // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green int lpf_mask) // apply lpf to colors : bit 0 - red, bit 1 - blue, bit2 - green
{ {
// struct CltExtra* gpu_kernel_offsets = (struct CltExtra*) vgpu_kernel_offsets;
dim3 t = threadIdx; dim3 t = threadIdx;
int tile_in_block = threadIdx.y; int tile_in_block = threadIdx.y;
int task_num = blockIdx.x * TILES_PER_BLOCK + tile_in_block; int task_num = blockIdx.x * TILES_PER_BLOCK + tile_in_block;
...@@ -370,8 +379,9 @@ __global__ void tileProcessor( ...@@ -370,8 +379,9 @@ __global__ void tileProcessor(
__shared__ struct tp_task tt [TILES_PER_BLOCK]; __shared__ struct tp_task tt [TILES_PER_BLOCK];
// Copy task data to shared memory // Copy task data to shared memory
tt[tile_in_block].task = gpu_task -> task; tt[tile_in_block].task = gpu_task -> task;
tt[tile_in_block].tx = gpu_task -> tx; // tt[tile_in_block].tx = gpu_task -> tx;
tt[tile_in_block].ty = gpu_task -> ty; // tt[tile_in_block].ty = gpu_task -> ty;
tt[tile_in_block].txy = gpu_task -> txy;
int thread0 = threadIdx.x & 1; int thread0 = threadIdx.x & 1;
int thread12 = threadIdx.x >>1; int thread12 = threadIdx.x >>1;
if (thread12 < NUM_CAMS) { if (thread12 < NUM_CAMS) {
...@@ -408,7 +418,7 @@ __global__ void tileProcessor( ...@@ -408,7 +418,7 @@ __global__ void tileProcessor(
for (int ncam = 0; ncam < NUM_CAMS; ncam++){ for (int ncam = 0; ncam < NUM_CAMS; ncam++){
for (int color = 0; color < NUM_COLORS; color++){ for (int color = 0; color < NUM_COLORS; color++){
convertCorrectTile( convertCorrectTile(
gpu_kernel_offsets[ncam], // float * gpu_kernel_offsets, (struct CltExtra*)(gpu_kernel_offsets[ncam]), // struct CltExtra* gpu_kernel_offsets,
gpu_kernels[ncam], // float * gpu_kernels, gpu_kernels[ncam], // float * gpu_kernels,
gpu_images[ncam], // float * gpu_images, gpu_images[ncam], // float * gpu_images,
gpu_clt[ncam], // float * gpu_clt, gpu_clt[ncam], // float * gpu_clt,
...@@ -416,7 +426,8 @@ __global__ void tileProcessor( ...@@ -416,7 +426,8 @@ __global__ void tileProcessor(
lpf_mask, // const int lpf_mask, lpf_mask, // const int lpf_mask,
tt[tile_in_block].xy[ncam][0], // const float centerX, tt[tile_in_block].xy[ncam][0], // const float centerX,
tt[tile_in_block].xy[ncam][1], // const float centerY, tt[tile_in_block].xy[ncam][1], // const float centerY,
tt[tile_in_block].tx | (tt[tile_in_block].ty <<16), // const int txy, // tt[tile_in_block].tx | (tt[tile_in_block].ty <<16), // const int txy,
tt[tile_in_block].txy, // const int txy,
dstride, // size_t dstride, // in floats (pixels) dstride, // size_t dstride, // in floats (pixels)
(float * )(clt_tile [tile_in_block]), // float clt_tile [TILES_PER_BLOCK][NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE]) (float * )(clt_tile [tile_in_block]), // float clt_tile [TILES_PER_BLOCK][NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE])
(float * )(clt_kernels[tile_in_block]), // float clt_tile [NUM_COLORS][4][DTT_SIZE][DTT_SIZE], (float * )(clt_kernels[tile_in_block]), // float clt_tile [NUM_COLORS][4][DTT_SIZE][DTT_SIZE],
......
...@@ -44,12 +44,13 @@ ...@@ -44,12 +44,13 @@
* This file is cpompatible with both runtime and driver API, runtime is used for development * This file is cpompatible with both runtime and driver API, runtime is used for development
* with Nvidia Nsight, driver API when calling these kernels from Java * with Nvidia Nsight, driver API when calling these kernels from Java
*/ */
#ifndef JCUDA
#define DTT_SIZE 8
#endif
#pragma once #pragma once
#define DTTTEST_BLOCK_WIDTH 32 #define DTTTEST_BLOCK_WIDTH 32
#define DTTTEST_BLOCK_HEIGHT 16 #define DTTTEST_BLOCK_HEIGHT 16
#define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1) #define DTTTEST_BLK_STRIDE (DTTTEST_BLOCK_WIDTH+1)
#define DTT_SIZE 8
//#define CUDART_INF_F __int_as_float(0x7f800000) //#define CUDART_INF_F __int_as_float(0x7f800000)
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment