Commit fbab1093 authored by Andrey Filippov's avatar Andrey Filippov

switched textures_accumulate to use geometry_correction too

parent 8893c688
......@@ -193,7 +193,7 @@ public class GPUTileProcessor {
private CUdeviceptr gpu_num_corr_tiles = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER
private CUdeviceptr gpu_texture_indices = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.POINTER
private CUdeviceptr gpu_port_offsets = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.POINTER
// private CUdeviceptr gpu_port_offsets = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.POINTER
private CUdeviceptr gpu_color_weights = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.POINTER
private CUdeviceptr gpu_woi = new CUdeviceptr(); // 4 integers (x, y, width, height) Rectangle - in tiles
......@@ -564,7 +564,7 @@ public class GPUTileProcessor {
// cuMemAlloc(gpu_texture_indices,tilesX * tilesY * Sizeof.POINTER);
cuMemAlloc(gpu_texture_indices,tilesX * tilesYa * Sizeof.POINTER);
cuMemAlloc(gpu_port_offsets, NUM_CAMS * 2 * Sizeof.FLOAT);
// cuMemAlloc(gpu_port_offsets, NUM_CAMS * 2 * Sizeof.FLOAT);
cuMemAlloc(gpu_color_weights, 3 * Sizeof.FLOAT);
......@@ -1261,7 +1261,6 @@ public class GPUTileProcessor {
// Parameters for the texture generation
Pointer.to(gpu_clt), // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
// Pointer.to(gpu_port_offsets), // float * port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
Pointer.to(new int[] {num_colors}), // int colors, // number of colors (3/1)
Pointer.to(new int[] {iis_lwir}), // int is_lwir, // do not perform shot correction
Pointer.to(new float[] {(float) min_shot}), // float min_shot, // 10.0
......@@ -1285,7 +1284,6 @@ public class GPUTileProcessor {
}
public void execTextures(
// double [][] port_offsets,
double [] color_weights,
boolean is_lwir,
double min_shot, // 10.0
......@@ -1300,88 +1298,15 @@ public class GPUTileProcessor {
IJ.showMessage("Error", "No GPU kernel: GPU_TEXTURES_kernel");
return;
}
// float [] fport_offsets = new float[port_offsets.length * 2];
// for (int cam = 0; cam < port_offsets.length; cam++) {
// fport_offsets[2*cam + 0] = (float) port_offsets[cam][0];
// fport_offsets[2*cam + 1] = (float) port_offsets[cam][1];
// }
// cuMemcpyHtoD(gpu_port_offsets, Pointer.to(fport_offsets), fport_offsets.length * Sizeof.FLOAT);
int num_colors = color_weights.length;
if (num_colors > 3) num_colors = 3;
float weighht0 = (float) color_weights[0];
float weighht1 = (num_colors >1)?((float) color_weights[1]):0.0f;
float weighht2 = (num_colors >2)?((float) color_weights[2]):0.0f;
int iis_lwir = (is_lwir)? 1:0;
int idust_remove = (dust_remove)? 1 : 0;
int ikeep_weights = (keep_weights)? 1 : 0;
int [] GridFullWarps = {(num_texture_tiles + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1};
int [] ThreadsFullWarps = {TEXTURE_THREADS_PER_TILE, NUM_CAMS, 1};
Pointer kernelParameters = Pointer.to(
// Pointer.to(new int[] {0}), // 0, // int border_tile, // if 1 - watch for border
Pointer.to(new int[] {0}), // int * woi, - not used
Pointer.to(gpu_clt),
Pointer.to(new int[] { num_texture_tiles }),
Pointer.to(gpu_texture_indices),
// Pointer.to(gpu_port_offsets),
Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
Pointer.to(new int[] { num_colors }),
Pointer.to(new int[] { iis_lwir }),
Pointer.to(new float[] {(float) min_shot }),
Pointer.to(new float[] {(float) scale_shot }),
Pointer.to(new float[] {(float) diff_sigma }),
Pointer.to(new float[] {(float) diff_threshold }),
Pointer.to(new float[] {(float) min_agree }),
Pointer.to(new float[] {weighht0 }),
Pointer.to(new float[] {weighht1 }),
Pointer.to(new float[] {weighht2 }),
Pointer.to(new int[] { idust_remove }),
Pointer.to(new int[] { ikeep_weights }),
Pointer.to(new int[] {0}),// 0, // const size_t texture_rbg_stride, // in floats - DISABLE GENERATION!
Pointer.to(new int[] {0}), // null, // new Pointer(), //Pointer.to(gpu_textures), // new Pointer(), // Pointer.to(gpu_textures),
Pointer.to(new int[] { texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_textures)
);
cuCtxSynchronize();
// Call the kernel function
cuLaunchKernel(GPU_TEXTURES_kernel,
GridFullWarps[0], GridFullWarps[1], GridFullWarps[2], // Grid dimension
ThreadsFullWarps[0], ThreadsFullWarps[1],ThreadsFullWarps[2],// Block dimension
0, null, // Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters, null); // Kernel- and extra parameters
cuCtxSynchronize();
}
public void execTextures(
double [][] port_offsets,
double [] color_weights,
boolean is_lwir,
double min_shot, // 10.0
double scale_shot, // 3.0
double diff_sigma, // pixel value/pixel change
double diff_threshold, // pixel value/pixel change
double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
boolean dust_remove,
boolean keep_weights) {
if (GPU_TEXTURES_kernel == null)
{
IJ.showMessage("Error", "No GPU kernel: GPU_TEXTURES_kernel");
return;
}
float [] fport_offsets = new float[port_offsets.length * 2];
for (int cam = 0; cam < port_offsets.length; cam++) {
fport_offsets[2*cam + 0] = (float) port_offsets[cam][0];
fport_offsets[2*cam + 1] = (float) port_offsets[cam][1];
}
cuMemcpyHtoD(gpu_port_offsets, Pointer.to(fport_offsets), fport_offsets.length * Sizeof.FLOAT);
float [] fcolor_weights = new float[3];
fcolor_weights[0] = (float) color_weights[0];
fcolor_weights[1] = (num_colors >1)?((float) color_weights[1]):0.0f;
fcolor_weights[2] = (num_colors >2)?((float) color_weights[2]):0.0f;
cuMemcpyHtoD(gpu_color_weights, Pointer.to(fcolor_weights), fcolor_weights.length * Sizeof.FLOAT);
int num_colors = color_weights.length;
if (num_colors > 3) num_colors = 3;
float weighht0 = (float) color_weights[0];
float weighht1 = (num_colors >1)?((float) color_weights[1]):0.0f;
float weighht2 = (num_colors >2)?((float) color_weights[2]):0.0f;
int iis_lwir = (is_lwir)? 1:0;
int idust_remove = (dust_remove)? 1 : 0;
int ikeep_weights = (keep_weights)? 1 : 0;
......@@ -1390,13 +1315,11 @@ public class GPUTileProcessor {
int [] ThreadsFullWarps = {TEXTURE_THREADS_PER_TILE, NUM_CAMS, 1};
Pointer kernelParameters = Pointer.to(
// Pointer.to(new int[] {0}), // 0, // int border_tile, // if 1 - watch for border
Pointer.to(new int[] {0}), // int * woi, - not used
Pointer.to(gpu_clt),
Pointer.to(new int[] { num_texture_tiles }),
Pointer.to(gpu_texture_indices),
Pointer.to(gpu_port_offsets),
// Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
Pointer.to(new int[] { num_colors }),
Pointer.to(new int[] { iis_lwir }),
Pointer.to(new float[] {(float) min_shot }),
......@@ -1404,9 +1327,7 @@ public class GPUTileProcessor {
Pointer.to(new float[] {(float) diff_sigma }),
Pointer.to(new float[] {(float) diff_threshold }),
Pointer.to(new float[] {(float) min_agree }),
Pointer.to(new float[] {weighht0 }),
Pointer.to(new float[] {weighht1 }),
Pointer.to(new float[] {weighht2 }),
Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G
Pointer.to(new int[] { idust_remove }),
Pointer.to(new int[] { ikeep_weights }),
Pointer.to(new int[] {0}),// 0, // const size_t texture_rbg_stride, // in floats - DISABLE GENERATION!
......
......@@ -2092,13 +2092,13 @@ public class TwoQuadCLT {
false); // boolean use_java_rByRDist) { // false - use newer GPU execCalcReverseDistortions); // once
gPUTileProcessor.setExtrinsicsVector(quadCLT_main.getGeometryCorrection().getCorrVector()); // for each new image
// TODO: calculate from the camera geometry?
/* // TODO: calculate from the camera geometry?
double[][] port_offsets = { // used only in textures to scale differences
{-0.5, -0.5},
{ 0.5, -0.5},
{-0.5, 0.5},
{ 0.5, 0.5}};
*/
// All set, run kernel (correct and convert)
int NREPEAT = 1; // 00;
System.out.println("\n------------ Running GPU "+NREPEAT+" times ----------------");
......@@ -2139,7 +2139,6 @@ public class TwoQuadCLT {
// run textures
long startTextures = System.nanoTime(); // System.nanoTime();
for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execTextures(
/** remove when done **/ port_offsets, // double [][] port_offsets,
col_weights, // double [] color_weights,
quadCLT_main.isLwir(), // boolean is_lwir,
clt_parameters.min_shot, // double min_shot, // 10.0
......
......@@ -1307,13 +1307,14 @@ __global__ void generate_RBGA(
printf("\n");
#endif
/* */
textures_accumulate<<<grid_texture,threads_texture>>>(
textures_accumulate <<<grid_texture,threads_texture>>>(
woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
ntt, // size_t num_texture_tiles, // number of texture tiles to process
gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// gpu_port_offsets, // float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
(float *) gpu_geometry_correction ->pXY0,
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
// (float *) gpu_geometry_correction ->pXY0,
colors, // int colors, // number of colors (3/1)
is_lwir, // int is_lwir, // do not perform shot correction
min_shot, // float min_shot, // 10.0
......@@ -1321,9 +1322,7 @@ __global__ void generate_RBGA(
diff_sigma, // float diff_sigma, // pixel value/pixel change
diff_threshold, // float diff_threshold, // pixel value/pixel change
min_agree, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
weights[0], // float weight0, // scale for R
weights[1], // float weight1, // scale for B
weights[2], // float weight2, // scale for G
weights, // float weights[3], // scale for R,B,G
dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
0, // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
......@@ -1774,7 +1773,7 @@ __global__ void textures_accumulate(
size_t num_texture_tiles, // number of texture tiles to process
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// TODO: use geometry_correction rXY !
float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction
float min_shot, // 10.0
......@@ -1782,9 +1781,7 @@ __global__ void textures_accumulate(
float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weight0, // scale for R
float weight1, // scale for B
float weight2, // scale for G
float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
......@@ -1794,7 +1791,8 @@ __global__ void textures_accumulate(
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
{
float weights[3] = {weight0, weight1, weight2};
// (float *) gpu_geometry_correction ->pXY0,
// float weights[3] = {weight0, weight1, weight2};
// will process exactly 4 cameras in one block (so this number is not adjustable here NUM_CAMS should be == 4 !
int camera_num = threadIdx.y;
int tile_indx = blockIdx.x; // * TEXTURE_TILES_PER_BLOCK + tile_in_block;
......@@ -1825,7 +1823,8 @@ __global__ void textures_accumulate(
__shared__ float ports_rgb [NUM_CAMS][NUM_COLORS]; // return to system memory (optionally pass null to skip calculation)
__shared__ float max_diff [NUM_CAMS]; // return to system memory (optionally pass null to skip calculation)
if (threadIdx.x < 2){
port_offsets[camera_num][threadIdx.x] = * (gpu_port_offsets + 2 * camera_num + threadIdx.x);
// port_offsets[camera_num][threadIdx.x] = * (gpu_port_offsets + 2 * camera_num + threadIdx.x);
port_offsets[camera_num][threadIdx.x] = gpu_geometry_correction->rXY[camera_num][threadIdx.x];
}
......
......@@ -81,7 +81,7 @@ extern "C" __global__ void textures_accumulate(
size_t num_texture_tiles, // number of texture tiles to process
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// TODO: use geometry_correction rXY !
float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction
float min_shot, // 10.0
......@@ -89,16 +89,14 @@ extern "C" __global__ void textures_accumulate(
float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weight0, // scale for R
float weight1, // scale for B
float weight2, // scale for G
float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
// combining both non-overlap and overlap (each calculated if pointer is not null )
size_t texture_rbg_stride, // in floats
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
extern "C"
__global__ void imclt_rbg_all(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment