Commit 4203dbbb authored by Andrey Filippov's avatar Andrey Filippov

adding generation of data for macroblocks

parent fbab1093
...@@ -702,9 +702,9 @@ private Panel panel1, ...@@ -702,9 +702,9 @@ private Panel panel1,
panelClt_GPU.setLayout(new GridLayout(1, 0, 5, 5)); // rows, columns, vgap, hgap panelClt_GPU.setLayout(new GridLayout(1, 0, 5, 5)); // rows, columns, vgap, hgap
addButton("JCUDA TEST", panelClt_GPU); addButton("JCUDA TEST", panelClt_GPU);
addButton("TF TEST", panelClt_GPU); addButton("TF TEST", panelClt_GPU);
addButton("GPU files", panelClt_GPU, color_conf_process); addButton("GPU simulate", panelClt_GPU, color_conf_process);
addButton("Rig8 gpu", panelClt_GPU, color_conf_process); addButton("GPU RUN", panelClt_GPU, color_conf_process);
addButton("ShowGPU", panelClt_GPU, color_conf_process); // addButton("ShowGPU", panelClt_GPU, color_conf_process);
addButton("LWIR_TEST", panelClt_GPU, color_conf_process); addButton("LWIR_TEST", panelClt_GPU, color_conf_process);
addButton("LWIR_ACQUIRE", panelClt_GPU, color_conf_process); addButton("LWIR_ACQUIRE", panelClt_GPU, color_conf_process);
...@@ -4915,13 +4915,13 @@ private Panel panel1, ...@@ -4915,13 +4915,13 @@ private Panel panel1,
getPairImages2(); getPairImages2();
return; return;
/* ======================================================================== */ /* ======================================================================== */
} else if (label.equals("GPU files")) { } else if (label.equals("GPU simulate")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL; DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL); EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
generateGPUDebugFiles(); generateGPUDebugFiles();
return; return;
/* ======================================================================== */ /* ======================================================================== */
} else if (label.equals("Rig8 gpu")) { } else if (label.equals("GPU RUN")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL; DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL); EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
getPairImages2Gpu(); getPairImages2Gpu();
......
...@@ -184,21 +184,22 @@ public class GPUTileProcessor { ...@@ -184,21 +184,22 @@ public class GPUTileProcessor {
private CUdeviceptr gpu_kernels = new CUdeviceptr(); private CUdeviceptr gpu_kernels = new CUdeviceptr();
private CUdeviceptr gpu_kernel_offsets = new CUdeviceptr(); private CUdeviceptr gpu_kernel_offsets = new CUdeviceptr();
private CUdeviceptr gpu_bayer = new CUdeviceptr(); private CUdeviceptr gpu_bayer = new CUdeviceptr();
private CUdeviceptr gpu_tasks = new CUdeviceptr(); // allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.POINTER private CUdeviceptr gpu_tasks = new CUdeviceptr(); // allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT
private CUdeviceptr gpu_corrs = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER private CUdeviceptr gpu_corrs = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.FLOAT
private CUdeviceptr gpu_textures = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER private CUdeviceptr gpu_textures = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
private CUdeviceptr gpu_clt = new CUdeviceptr(); private CUdeviceptr gpu_clt = new CUdeviceptr();
private CUdeviceptr gpu_4_images = new CUdeviceptr(); private CUdeviceptr gpu_4_images = new CUdeviceptr();
private CUdeviceptr gpu_corr_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER private CUdeviceptr gpu_corr_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
private CUdeviceptr gpu_num_corr_tiles = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER private CUdeviceptr gpu_num_corr_tiles = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
private CUdeviceptr gpu_texture_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER private CUdeviceptr gpu_texture_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
private CUdeviceptr gpu_diff_rgb_combo = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_CAMS* (NUM_COLORS + 1) * Sizeof.FLOAT
// private CUdeviceptr gpu_port_offsets = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.POINTER // private CUdeviceptr gpu_port_offsets = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.FLOAT
private CUdeviceptr gpu_color_weights = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.POINTER private CUdeviceptr gpu_color_weights = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.FLOAT
private CUdeviceptr gpu_woi = new CUdeviceptr(); // 4 integers (x, y, width, height) Rectangle - in tiles private CUdeviceptr gpu_woi = new CUdeviceptr(); // 4 integers (x, y, width, height) Rectangle - in tiles
private CUdeviceptr gpu_num_texture_tiles = new CUdeviceptr(); // 8 ints private CUdeviceptr gpu_num_texture_tiles = new CUdeviceptr(); // 8 ints
private CUdeviceptr gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER private CUdeviceptr gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
private CUdeviceptr gpu_correction_vector= new CUdeviceptr(); private CUdeviceptr gpu_correction_vector= new CUdeviceptr();
private CUdeviceptr gpu_rot_deriv= new CUdeviceptr(); // used internally by device, may be read to CPU for testing private CUdeviceptr gpu_rot_deriv= new CUdeviceptr(); // used internally by device, may be read to CPU for testing
...@@ -562,7 +563,9 @@ public class GPUTileProcessor { ...@@ -562,7 +563,9 @@ public class GPUTileProcessor {
//#define TILESYA ((TILESY +3) & (~3)) //#define TILESYA ((TILESY +3) & (~3))
int tilesYa = (tilesY + 3) & ~3; int tilesYa = (tilesY + 3) & ~3;
// cuMemAlloc(gpu_texture_indices,tilesX * tilesY * Sizeof.POINTER); // cuMemAlloc(gpu_texture_indices,tilesX * tilesY * Sizeof.POINTER);
cuMemAlloc(gpu_texture_indices,tilesX * tilesYa * Sizeof.POINTER); cuMemAlloc(gpu_texture_indices,tilesX * tilesYa * Sizeof.FLOAT);
cuMemAlloc(gpu_diff_rgb_combo, tilesX * tilesYa * NUM_CAMS* (NUM_COLORS + 1) * Sizeof.FLOAT);
// cuMemAlloc(gpu_port_offsets, NUM_CAMS * 2 * Sizeof.FLOAT); // cuMemAlloc(gpu_port_offsets, NUM_CAMS * 2 * Sizeof.FLOAT);
cuMemAlloc(gpu_color_weights, 3 * Sizeof.FLOAT); cuMemAlloc(gpu_color_weights, 3 * Sizeof.FLOAT);
...@@ -1272,7 +1275,9 @@ public class GPUTileProcessor { ...@@ -1272,7 +1275,9 @@ public class GPUTileProcessor {
Pointer.to(new int[] { idust_remove }), // int dust_remove, // Do not reduce average weight when only one image differes much from the average Pointer.to(new int[] { idust_remove }), // int dust_remove, // Do not reduce average weight when only one image differes much from the average
Pointer.to(new int[] {0}), // int keep_weights, // return channel weights after A in RGBA Pointer.to(new int[] {0}), // int keep_weights, // return channel weights after A in RGBA
Pointer.to(new int[] { texture_stride_rgba }), // const size_t texture_rbga_stride, // in floats Pointer.to(new int[] { texture_stride_rgba }), // const size_t texture_rbga_stride, // in floats
Pointer.to(gpu_textures_rgba)); // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles Pointer.to(gpu_textures_rgba), // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cuCtxSynchronize(); cuCtxSynchronize();
// Call the kernel function // Call the kernel function
cuLaunchKernel(GPU_RBGA_kernel, cuLaunchKernel(GPU_RBGA_kernel,
...@@ -1333,8 +1338,8 @@ public class GPUTileProcessor { ...@@ -1333,8 +1338,8 @@ public class GPUTileProcessor {
Pointer.to(new int[] {0}),// 0, // const size_t texture_rbg_stride, // in floats - DISABLE GENERATION! Pointer.to(new int[] {0}),// 0, // const size_t texture_rbg_stride, // in floats - DISABLE GENERATION!
Pointer.to(new int[] {0}), // null, // new Pointer(), //Pointer.to(gpu_textures), // new Pointer(), // Pointer.to(gpu_textures), Pointer.to(new int[] {0}), // null, // new Pointer(), //Pointer.to(gpu_textures), // new Pointer(), // Pointer.to(gpu_textures),
Pointer.to(new int[] { texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles Pointer.to(new int[] { texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_textures) Pointer.to(gpu_textures),
); Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cuCtxSynchronize(); cuCtxSynchronize();
// Call the kernel function // Call the kernel function
cuLaunchKernel(GPU_TEXTURES_kernel, cuLaunchKernel(GPU_TEXTURES_kernel,
...@@ -1574,7 +1579,7 @@ public class GPUTileProcessor { ...@@ -1574,7 +1579,7 @@ public class GPUTileProcessor {
// for (String sourceCode: sourceCodeUnits) { // for (String sourceCode: sourceCodeUnits) {
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) { for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
String sourceCode = sourceCodeUnits[cunit]; String sourceCode = sourceCodeUnits[cunit];
//System.out.print(sourceCode); // System.out.print(sourceCode);
// Use the NVRTC to create a program by compiling the source code // Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram(); nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null); nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
......
...@@ -45,11 +45,11 @@ public class MacroCorrelation { ...@@ -45,11 +45,11 @@ public class MacroCorrelation {
double trusted_correlation, double trusted_correlation,
double weight_var, // = 1.0; // weight of variance data (old, detects thin wires?) double weight_var, // = 1.0; // weight of variance data (old, detects thin wires?)
double weight_Y, // = 1.0; // weight of average intensity double weight_Y, // = 1.0; // weight of average intensity
double weight_RBmG // = 5.0; // weight of average color difference (0.5*(R+B)-G), shoukld be ~5*weight_Y double weight_RBmG // = 5.0; // weight of average color difference (0.5*(R+B)-G), should be ~5*weight_Y
){ ){
this.weight_var = weight_var; // weight of variance data (old, detects thin wires?) this.weight_var = weight_var; // weight of variance data (old, detects thin wires?)
this.weight_Y = weight_Y; // weight of average intensity this.weight_Y = weight_Y; // weight of average intensity
this.weight_RBmG = weight_RBmG; // = 5.0; // weight of average color difference (0.5*(R+B)-G), shoukld be ~5*weight_Y this.weight_RBmG = weight_RBmG; // = 5.0; // weight of average color difference (0.5*(R+B)-G), should be ~5*weight_Y
this.tp = tp; this.tp = tp;
final int pTilesX = tp.getTilesX(); final int pTilesX = tp.getTilesX();
......
...@@ -1196,7 +1196,8 @@ __global__ void generate_RBGA( ...@@ -1196,7 +1196,8 @@ __global__ void generate_RBGA(
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{ {
// TODO use atomic_add to increment num_texture_tiles // TODO use atomic_add to increment num_texture_tiles
// TODO calculate woi // TODO calculate woi
...@@ -1329,7 +1330,9 @@ __global__ void generate_RBGA( ...@@ -1329,7 +1330,9 @@ __global__ void generate_RBGA(
texture_rbga_stride, // size_t texture_rbg_stride, // in floats texture_rbga_stride, // size_t texture_rbg_stride, // in floats
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024) 0, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles); // (float *) 0 ); // float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cudaDeviceSynchronize(); // not needed yet, just for testing cudaDeviceSynchronize(); // not needed yet, just for testing
/* */ /* */
} }
...@@ -1788,8 +1791,8 @@ __global__ void textures_accumulate( ...@@ -1788,8 +1791,8 @@ __global__ void textures_accumulate(
size_t texture_rbg_stride, // in floats size_t texture_rbg_stride, // in floats
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
size_t texture_stride, // in floats (now 256*4 = 1024) size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{ {
// (float *) gpu_geometry_correction ->pXY0, // (float *) gpu_geometry_correction ->pXY0,
// float weights[3] = {weight0, weight1, weight2}; // float weights[3] = {weight0, weight1, weight2};
...@@ -1997,8 +2000,8 @@ __global__ void textures_accumulate( ...@@ -1997,8 +2000,8 @@ __global__ void textures_accumulate(
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union ! (float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output (float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
(float *) shr1.rgbaw, // float * rgba, // result (float *) shr1.rgbaw, // float * rgba, // result
(float * ) 0, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null (float * ) ports_rgb, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
(float * ) 0, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null (float * ) max_diff, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null
(float *) port_offsets, // float * port_offsets, // [port]{x_off, y_off} - just to scale pixel value differences (float *) port_offsets, // float * port_offsets, // [port]{x_off, y_off} - just to scale pixel value differences
diff_sigma, // float diff_sigma, // pixel value/pixel change diff_sigma, // float diff_sigma, // pixel value/pixel change
diff_threshold, // float diff_threshold, // pixel value/pixel change diff_threshold, // float diff_threshold, // pixel value/pixel change
...@@ -2013,8 +2016,8 @@ __global__ void textures_accumulate( ...@@ -2013,8 +2016,8 @@ __global__ void textures_accumulate(
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union ! (float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output (float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
(float *) shr1.rgbaw, // float * rgba, // result (float *) shr1.rgbaw, // float * rgba, // result
(float * ) 0, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null (float * ) ports_rgb, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
(float * ) 0, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null (float * ) max_diff, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null
(float *) port_offsets, // float * port_offsets, // [port]{x_off, y_off} - just to scale pixel value differences (float *) port_offsets, // float * port_offsets, // [port]{x_off, y_off} - just to scale pixel value differences
diff_sigma, // float diff_sigma, // pixel value/pixel change diff_sigma, // float diff_sigma, // pixel value/pixel change
diff_threshold, // float diff_threshold, // pixel value/pixel change diff_threshold, // float diff_threshold, // pixel value/pixel change
......
...@@ -96,7 +96,9 @@ extern "C" __global__ void textures_accumulate( ...@@ -96,7 +96,9 @@ extern "C" __global__ void textures_accumulate(
size_t texture_rbg_stride, // in floats size_t texture_rbg_stride, // in floats
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
size_t texture_stride, // in floats (now 256*4 = 1024) size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
extern "C" extern "C"
__global__ void imclt_rbg_all( __global__ void imclt_rbg_all(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment