Commit 4203dbbb authored by Andrey Filippov's avatar Andrey Filippov

adding generation of data for macroblocks

parent fbab1093
......@@ -702,9 +702,9 @@ private Panel panel1,
panelClt_GPU.setLayout(new GridLayout(1, 0, 5, 5)); // rows, columns, vgap, hgap
addButton("JCUDA TEST", panelClt_GPU);
addButton("TF TEST", panelClt_GPU);
addButton("GPU files", panelClt_GPU, color_conf_process);
addButton("Rig8 gpu", panelClt_GPU, color_conf_process);
addButton("ShowGPU", panelClt_GPU, color_conf_process);
addButton("GPU simulate", panelClt_GPU, color_conf_process);
addButton("GPU RUN", panelClt_GPU, color_conf_process);
// addButton("ShowGPU", panelClt_GPU, color_conf_process);
addButton("LWIR_TEST", panelClt_GPU, color_conf_process);
addButton("LWIR_ACQUIRE", panelClt_GPU, color_conf_process);
......@@ -4915,13 +4915,13 @@ private Panel panel1,
getPairImages2();
return;
/* ======================================================================== */
} else if (label.equals("GPU files")) {
} else if (label.equals("GPU simulate")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
generateGPUDebugFiles();
return;
/* ======================================================================== */
} else if (label.equals("Rig8 gpu")) {
} else if (label.equals("GPU RUN")) {
DEBUG_LEVEL=MASTER_DEBUG_LEVEL;
EYESIS_CORRECTIONS.setDebug(DEBUG_LEVEL);
getPairImages2Gpu();
......
......@@ -184,21 +184,22 @@ public class GPUTileProcessor {
private CUdeviceptr gpu_kernels = new CUdeviceptr();
private CUdeviceptr gpu_kernel_offsets = new CUdeviceptr();
private CUdeviceptr gpu_bayer = new CUdeviceptr();
private CUdeviceptr gpu_tasks = new CUdeviceptr(); // allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.POINTER
private CUdeviceptr gpu_corrs = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.POINTER
private CUdeviceptr gpu_textures = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER
private CUdeviceptr gpu_tasks = new CUdeviceptr(); // allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT
private CUdeviceptr gpu_corrs = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.FLOAT
private CUdeviceptr gpu_textures = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
private CUdeviceptr gpu_clt = new CUdeviceptr();
private CUdeviceptr gpu_4_images = new CUdeviceptr();
private CUdeviceptr gpu_corr_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER
private CUdeviceptr gpu_num_corr_tiles = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER
private CUdeviceptr gpu_texture_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER
private CUdeviceptr gpu_corr_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
private CUdeviceptr gpu_num_corr_tiles = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
private CUdeviceptr gpu_texture_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
private CUdeviceptr gpu_diff_rgb_combo = new CUdeviceptr(); // allocate tilesX * tilesY * NUM_CAMS* (NUM_COLORS + 1) * Sizeof.FLOAT
// private CUdeviceptr gpu_port_offsets = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.POINTER
private CUdeviceptr gpu_color_weights = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.POINTER
// private CUdeviceptr gpu_port_offsets = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.FLOAT
private CUdeviceptr gpu_color_weights = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.FLOAT
private CUdeviceptr gpu_woi = new CUdeviceptr(); // 4 integers (x, y, width, height) Rectangle - in tiles
private CUdeviceptr gpu_num_texture_tiles = new CUdeviceptr(); // 8 ints
private CUdeviceptr gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.POINTER
private CUdeviceptr gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
private CUdeviceptr gpu_correction_vector= new CUdeviceptr();
private CUdeviceptr gpu_rot_deriv= new CUdeviceptr(); // used internally by device, may be read to CPU for testing
......@@ -562,7 +563,9 @@ public class GPUTileProcessor {
//#define TILESYA ((TILESY +3) & (~3))
int tilesYa = (tilesY + 3) & ~3;
// cuMemAlloc(gpu_texture_indices,tilesX * tilesY * Sizeof.POINTER);
cuMemAlloc(gpu_texture_indices,tilesX * tilesYa * Sizeof.POINTER);
cuMemAlloc(gpu_texture_indices,tilesX * tilesYa * Sizeof.FLOAT);
cuMemAlloc(gpu_diff_rgb_combo, tilesX * tilesYa * NUM_CAMS* (NUM_COLORS + 1) * Sizeof.FLOAT);
// cuMemAlloc(gpu_port_offsets, NUM_CAMS * 2 * Sizeof.FLOAT);
cuMemAlloc(gpu_color_weights, 3 * Sizeof.FLOAT);
......@@ -1272,7 +1275,9 @@ public class GPUTileProcessor {
Pointer.to(new int[] { idust_remove }), // int dust_remove, // Do not reduce average weight when only one image differes much from the average
Pointer.to(new int[] {0}), // int keep_weights, // return channel weights after A in RGBA
Pointer.to(new int[] { texture_stride_rgba }), // const size_t texture_rbga_stride, // in floats
Pointer.to(gpu_textures_rgba)); // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_textures_rgba), // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cuCtxSynchronize();
// Call the kernel function
cuLaunchKernel(GPU_RBGA_kernel,
......@@ -1333,8 +1338,8 @@ public class GPUTileProcessor {
Pointer.to(new int[] {0}),// 0, // const size_t texture_rbg_stride, // in floats - DISABLE GENERATION!
Pointer.to(new int[] {0}), // null, // new Pointer(), //Pointer.to(gpu_textures), // new Pointer(), // Pointer.to(gpu_textures),
Pointer.to(new int[] { texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_textures)
);
Pointer.to(gpu_textures),
Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cuCtxSynchronize();
// Call the kernel function
cuLaunchKernel(GPU_TEXTURES_kernel,
......@@ -1574,7 +1579,7 @@ public class GPUTileProcessor {
// for (String sourceCode: sourceCodeUnits) {
for (int cunit = 0; cunit < ptxDataUnits.length; cunit++) {
String sourceCode = sourceCodeUnits[cunit];
//System.out.print(sourceCode);
// System.out.print(sourceCode);
// Use the NVRTC to create a program by compiling the source code
nvrtcProgram program = new nvrtcProgram();
nvrtcCreateProgram( program, sourceCode, null, 0, null, null);
......
......@@ -45,11 +45,11 @@ public class MacroCorrelation {
double trusted_correlation,
double weight_var, // = 1.0; // weight of variance data (old, detects thin wires?)
double weight_Y, // = 1.0; // weight of average intensity
double weight_RBmG // = 5.0; // weight of average color difference (0.5*(R+B)-G), shoukld be ~5*weight_Y
double weight_RBmG // = 5.0; // weight of average color difference (0.5*(R+B)-G), should be ~5*weight_Y
){
this.weight_var = weight_var; // weight of variance data (old, detects thin wires?)
this.weight_Y = weight_Y; // weight of average intensity
this.weight_RBmG = weight_RBmG; // = 5.0; // weight of average color difference (0.5*(R+B)-G), shoukld be ~5*weight_Y
this.weight_RBmG = weight_RBmG; // = 5.0; // weight of average color difference (0.5*(R+B)-G), should be ~5*weight_Y
this.tp = tp;
final int pTilesX = tp.getTilesX();
......
......@@ -1196,7 +1196,8 @@ __global__ void generate_RBGA(
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{
// TODO use atomic_add to increment num_texture_tiles
// TODO calculate woi
......@@ -1329,7 +1330,9 @@ __global__ void generate_RBGA(
texture_rbga_stride, // size_t texture_rbg_stride, // in floats
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles); // (float *) 0 ); // float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cudaDeviceSynchronize(); // not needed yet, just for testing
/* */
}
......@@ -1788,8 +1791,8 @@ __global__ void textures_accumulate(
size_t texture_rbg_stride, // in floats
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{
// (float *) gpu_geometry_correction ->pXY0,
// float weights[3] = {weight0, weight1, weight2};
......@@ -1997,8 +2000,8 @@ __global__ void textures_accumulate(
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
(float *) shr1.rgbaw, // float * rgba, // result
(float * ) 0, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
(float * ) 0, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null
(float * ) ports_rgb, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
(float * ) max_diff, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null
(float *) port_offsets, // float * port_offsets, // [port]{x_off, y_off} - just to scale pixel value differences
diff_sigma, // float diff_sigma, // pixel value/pixel change
diff_threshold, // float diff_threshold, // pixel value/pixel change
......@@ -2013,8 +2016,8 @@ __global__ void textures_accumulate(
(float*) shr.mclt_debayer, // float * mclt_tile, // debayer // has gaps to align with union !
(float*) mclt_tiles, // float * rbg_tile, // if not null - original (not-debayered) rbg tile to use for the output
(float *) shr1.rgbaw, // float * rgba, // result
(float * ) 0, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
(float * ) 0, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null
(float * ) ports_rgb, // float * ports_rgb, // average values of R,G,B for each camera (R0,R1,...,B2,B3) // null
(float * ) max_diff, // float * max_diff, // maximal (weighted) deviation of each channel from the average /null
(float *) port_offsets, // float * port_offsets, // [port]{x_off, y_off} - just to scale pixel value differences
diff_sigma, // float diff_sigma, // pixel value/pixel change
diff_threshold, // float diff_threshold, // pixel value/pixel change
......
......@@ -96,7 +96,9 @@ extern "C" __global__ void textures_accumulate(
size_t texture_rbg_stride, // in floats
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
extern "C"
__global__ void imclt_rbg_all(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment