Commit 10c327ae authored by Andrey Filippov's avatar Andrey Filippov

debugging GPU OOB with large disparities, fixed in the kernel code

parent b5bfb231
......@@ -5812,6 +5812,7 @@ private Panel panel1,
QUAD_CLT, // QuadCLT quadCLT_main,
QUAD_CLT_AUX, // QuadCLT quadCLT_aux,
CLT_PARAMETERS, // EyesisCorrectionParameters.DCTParameters dct_parameters,
CORRECTION_PARAMETERS, // EyesisCorrectionParameters ecp,
DEBAYER_PARAMETERS, //EyesisCorrectionParameters.DebayerParameters debayerParameters,
COLOR_PROC_PARAMETERS, //EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
COLOR_PROC_PARAMETERS_AUX, //EyesisCorrectionParameters.ColorProcParameters colorProcParameters_aux,
......
......@@ -1359,7 +1359,7 @@ public class GPUTileProcessor {
public void getTileSubcamOffsets(
final TpTask[] tp_tasks, // will use // modify to have offsets for 8 cameras
final GeometryCorrection geometryCorrection_main,
final GeometryCorrection geometryCorrection_aux, // if null, will only calculate offsets fro the main camera
final GeometryCorrection geometryCorrection_aux, // if null, will only calculate offsets from the main camera
final double [][][] ers_delay, // if not null - fill with tile center acquisition delay
final int threadsMax, // maximal number of threads to launch
final int debugLevel)
......
......@@ -456,6 +456,7 @@ public class TwoQuadCLT {
QuadCLT quadCLT_main,
QuadCLT quadCLT_aux,
CLTParameters clt_parameters,
EyesisCorrectionParameters.CorrectionParameters ecp,
EyesisCorrectionParameters.DebayerParameters debayerParameters,
ColorProcParameters colorProcParameters,
ColorProcParameters colorProcParameters_aux,
......@@ -523,6 +524,7 @@ public class TwoQuadCLT {
saturation_imp_main, // boolean [][] saturation_main, // (near) saturated pixels or null
saturation_imp_aux, // boolean [][] saturation_aux, // (near) saturated pixels or null
clt_parameters, // EyesisCorrectionParameters.CLTParameters clt_parameters,
ecp, // EyesisCorrectionParameters.CorrectionParameters ecp,
debayerParameters, // EyesisCorrectionParameters.DebayerParameters debayerParameters,
colorProcParameters, // EyesisCorrectionParameters.ColorProcParameters colorProcParameters,
colorProcParameters_aux, // EyesisCorrectionParameters.ColorProcParameters colorProcParameters_aux,
......@@ -1191,8 +1193,6 @@ public class TwoQuadCLT {
bb.order(ByteOrder.LITTLE_ENDIAN);
bb.clear();
for (int i = 0; i < port_xy.length; i++) {
// dos.writeFloat((float) (port_xy[i][chn][0])); // x-offset
// dos.writeFloat((float) (port_xy[i][chn][1])); // y-offset
bb.putFloat((float) (port_xy[i][chn][0])); // x-offset
bb.putFloat((float) (port_xy[i][chn][1])); // y-offset
}
......@@ -1521,7 +1521,7 @@ public class TwoQuadCLT {
String [] rgba_titles = {"red","blue","green","alpha"};
String [] rgba_weights_titles = {"red","blue","green","alpha","port0","port1","port2","port3","r-rms","b-rms","g-rms","w-rms"};
if ((texture_tiles_main != null) && (texture_tiles_aux != null)){
if ((debugLevel > -2) && (clt_parameters.tileX >= 0) && (clt_parameters.tileY >= 0) && (clt_parameters.tileX < tilesX) && (clt_parameters.tileY < tilesY)) {
if ((debugLevel > -1) && (clt_parameters.tileX >= 0) && (clt_parameters.tileY >= 0) && (clt_parameters.tileX < tilesX) && (clt_parameters.tileY < tilesY)) {
double [][] texture_tile = texture_tiles_main[clt_parameters.tileY][clt_parameters.tileX];
int tile = +clt_parameters.tileY * tilesX +clt_parameters.tileX;
System.out.println("=== tileX= "+clt_parameters.tileX+" tileY= "+clt_parameters.tileY+" tile="+tile+" ===");
......@@ -1903,6 +1903,7 @@ public class TwoQuadCLT {
boolean [][] saturation_main, // (near) saturated pixels or null
boolean [][] saturation_aux, // (near) saturated pixels or null
CLTParameters clt_parameters,
EyesisCorrectionParameters.CorrectionParameters ecp,
EyesisCorrectionParameters.DebayerParameters debayerParameters,
ColorProcParameters colorProcParameters,
ColorProcParameters colorProcParameters_aux,
......@@ -2007,6 +2008,47 @@ public class TwoQuadCLT {
threadsMax, // final int threadsMax, // maximal number of threads to launch
debugLevel); // final int debugLevel)
// Optionally save offsets here?
// EyesisCorrectionParameters.CorrectionParameters ecp,
boolean save_ports_xy = false; // true; Same files as saved with the kernels
if ((ecp.tile_processor_gpu != null) && !ecp.tile_processor_gpu.isEmpty() && save_ports_xy) {
int quad = 4;
String file_prefix = ecp.tile_processor_gpu +"clt/main";
for (int chn = 0; chn < quad; chn++) {
String img_path = file_prefix+"_chn"+chn+".portsxy";
FileOutputStream fos;
try {
fos = new FileOutputStream(img_path);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
System.out.println("Could not write to "+img_path+" (file not found) port offsets");
break;
}
DataOutputStream dos = new DataOutputStream(fos);
WritableByteChannel channel = Channels.newChannel(dos);
ByteBuffer bb = ByteBuffer.allocate(tp_tasks.length * 2 * 4);
bb.order(ByteOrder.LITTLE_ENDIAN);
bb.clear();
for (int i = 0; i < tp_tasks.length; i++) {
bb.putFloat((tp_tasks[i].xy[chn][0])); // x-offset
bb.putFloat((tp_tasks[i].xy[chn][1])); // y-offset
}
bb.flip();
try {
channel.write(bb);
} catch (IOException e) {
System.out.println("Could not write to "+img_path+" port offsets");
break;
}
try {
dos.close();
} catch (IOException e) {
System.out.println("Could not close DataOutputStream for "+img_path+" port offsets");
}
System.out.println("Wrote port offsets to "+img_path+".");
}
}
gPUTileProcessor.setTasks(
tp_tasks, // TpTask [] tile_tasks,
use_aux); // boolean use_aux)
......@@ -2236,11 +2278,25 @@ public class TwoQuadCLT {
texture_stack.addSlice("main", imp_rgba_main.getProcessor().getPixels()); // single slice
ImagePlus imp_texture_stack = new ImagePlus(name+"-RGBA-D"+clt_parameters.disparity, texture_stack);
imp_texture_stack.getProcessor().resetMinAndMax();
imp_texture_stack.show();
// imp_texture_stack.show();
String results_path= quadCLT_main.correctionsParameters.selectResultsDirectory( // selectX3dDirectory(
// name, // quad timestamp. Will be ignored if correctionsParameters.use_x3d_subdirs is false
// quadCLT_main.correctionsParameters.x3dModelVersion,
true, // smart,
true); //newAllowed, // save
quadCLT_main.eyesisCorrections.saveAndShow(
imp_texture_stack,
results_path,
true, // quadCLT_main.correctionsParameters.png && !clt_parameters.black_back,
true, // !batch_mode && clt_parameters.show_textures,
0, // quadCLT_main.correctionsParameters.JPEG_quality, // jpegQuality); // jpegQuality){// <0 - keep current, 0 - force Tiff, >0 use for JPEG
(debugLevel > 0) ? debugLevel : 1); // int debugLevel (print what it saves)
}
// convert textures to RGBA in Java
if (clt_parameters.show_rgba_color) {
if (clt_parameters.show_rgba_color && (debugLevel > 0)) { // disabling
int numcol = quadCLT_main.isMonochrome()?1:3;
int ports = imp_quad_main.length;
int num_src_slices = numcol + 1 + (clt_parameters.keep_weights?(ports + numcol + 1):0); // 12 ; // calculate
......@@ -2253,7 +2309,7 @@ public class TwoQuadCLT {
int texture_slice_size = (2 * GPUTileProcessor.DTT_SIZE)* (2 * GPUTileProcessor.DTT_SIZE);
int texture_tile_size = texture_slice_size * num_src_slices ;
if (debugLevel > -2) {
if (debugLevel > -1) {
for (int indx = 0; indx < texture_indices.length; indx++) if ((texture_indices[indx] & (1 << GPUTileProcessor.LIST_TEXTURE_BIT)) != 0){
int tile = texture_indices[indx] >> GPUTileProcessor.CORR_NTILE_SHIFT;
int tileX = tile % tilesX;
......@@ -2284,7 +2340,7 @@ public class TwoQuadCLT {
num_src_slices // int num_src_slices
);
if ((debugLevel > -2) && (clt_parameters.tileX >= 0) && (clt_parameters.tileY >= 0) && (clt_parameters.tileX < tilesX) && (clt_parameters.tileY < tilesY)) {
if ((debugLevel > -1) && (clt_parameters.tileX >= 0) && (clt_parameters.tileY >= 0) && (clt_parameters.tileX < tilesX) && (clt_parameters.tileY < tilesY)) {
String [] rgba_titles = {"red","blue","green","alpha"};
String [] rgba_weights_titles = {"red","blue","green","alpha","port0","port1","port2","port3","r-rms","b-rms","g-rms","w-rms"};
double [][] texture_tile = texture_tiles[clt_parameters.tileY][clt_parameters.tileX];
......
......@@ -90,7 +90,7 @@
#define DEBUG11 1
#define DEBUG12 1
//#define USE_textures_gen
#define DEBUG_OOB1 1
#endif //#ifndef JCUDA
#define TASK_TEXTURE_BITS ((1 << TASK_TEXTURE_N_BIT) | (1 << TASK_TEXTURE_E_BIT) | (1 << TASK_TEXTURE_S_BIT) | (1 << TASK_TEXTURE_W_BIT))
......@@ -169,8 +169,8 @@
//#define BAYER_BLUE_COL (1 - BAYER_RED_COL)
//#define DBG_TILE_X 40
//#define DBG_TILE_Y 80
#define DBG_TILE_X 49
#define DBG_TILE_Y 66
#define DBG_TILE_X 161 // 49
#define DBG_TILE_Y 111 // 66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
//56494
......@@ -1562,8 +1562,8 @@ __global__ void generate_RBGA(
#ifdef DEBUG12
printf("\ngenerate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d\n",
pass, border_tile,ti_offset, ntt);
printf("\ngenerate_RBGA() gpu_texture_indices= 0x%x, gpu_texture_indices + ti_offset=0x%x\n",
(int) gpu_texture_indices, (int) (gpu_texture_indices + ti_offset));
printf("\ngenerate_RBGA() gpu_texture_indices= %p, gpu_texture_indices + ti_offset= %p\n",
(void *) gpu_texture_indices, (void *) (gpu_texture_indices + ti_offset));
printf("\ngenerate_RBGA() grid_texture={%d, %d, %d)\n",
grid_texture.x, grid_texture.y, grid_texture.z);
printf("\ngenerate_RBGA() threads_texture={%d, %d, %d)\n",
......@@ -1833,8 +1833,8 @@ __global__ void gen_texture_list(
if ((x == DBG_TILE_X) && (y == DBG_TILE_Y)){
printf("\ngen_texture_list() buff_head=%d, buf_offset = %d, num_offset= %d, is_border=%d\n",
buff_head, buf_offset, num_offset,is_border);
printf("\ngen_texture_list() gpu_texture_indices = 0x%x, gpu_texture_indices + buf_offset = 0x%x\n",
(int) gpu_texture_indices, (int) (gpu_texture_indices + buf_offset));
printf("\ngen_texture_list() gpu_texture_indices = %p, gpu_texture_indices + buf_offset = %p\n",
(void *) gpu_texture_indices, (void *) (gpu_texture_indices + buf_offset));
}
__syncthreads();// __syncwarp();
#endif // DEBUG12
......@@ -3170,6 +3170,12 @@ __device__ void convertCorrectTile(
px = centerX - DTT_SIZE - (clt_extra->data_x + clt_extra->dxc_dx * kdx + clt_extra->dxc_dy * kdy) ; // fractional left corner
int itlx = (int) floorf(px +0.5f);
if (itlx < 0){
itlx &= 1; // for color - extend by pairs
}
if (itlx >= (IMG_WIDTH - DTT_SIZE)){
itlx = itlx & 1 +(IMG_WIDTH - DTT_SIZE - 2); // for color - extend by pairs
}
int_topleft [0] = itlx;
float shift_hor = itlx - px;
residual_shift[0] = shift_hor;
......@@ -3205,8 +3211,24 @@ __device__ void convertCorrectTile(
py = centerY - DTT_SIZE - (clt_extra->data_y + clt_extra->dyc_dx * kdx + clt_extra->dyc_dy * kdy) ; // fractional top corner
int itly = (int) floorf(py +0.5f);
if (itly < 0){
itly &= 1; // for color - extend by pairs
}
if (itly >= (IMG_HEIGHT - DTT_SIZE)){
itly = (itly & 1) +(IMG_HEIGHT - DTT_SIZE - 2); // for color - extend by pairs
}
int_topleft[1] = itly;
#ifdef DEBUG_OOB1
if ((int_topleft[0] < 0) || (int_topleft[1] < 0) || (int_topleft[0] >= (IMG_WIDTH - DTT_SIZE)) || (int_topleft[1] >= IMG_HEIGHT - DTT_SIZE)){
printf("Source data OOB, left=%d, top=%d\n",int_topleft[0],int_topleft[1]);
printf("\n");
printf("\n");
__syncthreads();// __syncwarp();
}
#endif // DEBUG_OOB1
float shift_vert = itly - py;
residual_shift[1] = shift_vert;
x = shift_vert *(1.0f/16);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment