Commit 0182bb3c authored by Andrey Filippov's avatar Andrey Filippov

consolidated 5 float inputs to an array of 5 floats to solve CUDA_ERROR_INVALID_PTX

parent 02f6e62d
...@@ -198,8 +198,8 @@ public class GPUTileProcessor { ...@@ -198,8 +198,8 @@ public class GPUTileProcessor {
private CUdeviceptr gpu_texture_indices_len = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT private CUdeviceptr gpu_texture_indices_len = new CUdeviceptr(); // allocate tilesX * tilesY * 6 * Sizeof.FLOAT
private CUdeviceptr gpu_diff_rgb_combo = new CUdeviceptr(); // 1 int private CUdeviceptr gpu_diff_rgb_combo = new CUdeviceptr(); // 1 int
// private CUdeviceptr gpu_port_offsets = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.FLOAT private CUdeviceptr gpu_color_weights = new CUdeviceptr(); // allocate 3 * Sizeof.FLOAT
private CUdeviceptr gpu_color_weights = new CUdeviceptr(); // allocate Quad * 2 * Sizeof.FLOAT private CUdeviceptr gpu_generate_RBGA_params =new CUdeviceptr(); // allocate 5 * Sizeof.FLOAT
private CUdeviceptr gpu_woi = new CUdeviceptr(); // 4 integers (x, y, width, height) Rectangle - in tiles private CUdeviceptr gpu_woi = new CUdeviceptr(); // 4 integers (x, y, width, height) Rectangle - in tiles
private CUdeviceptr gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT private CUdeviceptr gpu_textures_rgba = new CUdeviceptr(); // allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
...@@ -570,8 +570,8 @@ public class GPUTileProcessor { ...@@ -570,8 +570,8 @@ public class GPUTileProcessor {
cuMemAlloc(gpu_diff_rgb_combo, tilesX * tilesYa * NUM_CAMS* (NUM_COLORS + 1) * Sizeof.FLOAT); cuMemAlloc(gpu_diff_rgb_combo, tilesX * tilesYa * NUM_CAMS* (NUM_COLORS + 1) * Sizeof.FLOAT);
// cuMemAlloc(gpu_port_offsets, NUM_CAMS * 2 * Sizeof.FLOAT);
cuMemAlloc(gpu_color_weights, 3 * Sizeof.FLOAT); cuMemAlloc(gpu_color_weights, 3 * Sizeof.FLOAT);
cuMemAlloc(gpu_generate_RBGA_params, 5 * Sizeof.FLOAT);
cuMemAlloc(gpu_woi, 4 * Sizeof.FLOAT); cuMemAlloc(gpu_woi, 4 * Sizeof.FLOAT);
...@@ -1268,6 +1268,15 @@ public class GPUTileProcessor { ...@@ -1268,6 +1268,15 @@ public class GPUTileProcessor {
fcolor_weights[2] = (num_colors >2)?((float) color_weights[2]):0.0f; fcolor_weights[2] = (num_colors >2)?((float) color_weights[2]):0.0f;
cuMemcpyHtoD(gpu_color_weights, Pointer.to(fcolor_weights), fcolor_weights.length * Sizeof.FLOAT); cuMemcpyHtoD(gpu_color_weights, Pointer.to(fcolor_weights), fcolor_weights.length * Sizeof.FLOAT);
float [] generate_RBGA_params = {
(float) min_shot, // 10.0
(float) scale_shot, // 3.0
(float) diff_sigma, // pixel value/pixel change
(float) diff_threshold, // pixel value/pixel change
(float) min_agree // minimal number of channels to agree on a point (real number to work with fuzzy averages)
};
cuMemcpyHtoD(gpu_generate_RBGA_params, Pointer.to(generate_RBGA_params), generate_RBGA_params.length * Sizeof.FLOAT);
int iis_lwir = (is_lwir)? 1:0; int iis_lwir = (is_lwir)? 1:0;
int idust_remove = (dust_remove)? 1 : 0; int idust_remove = (dust_remove)? 1 : 0;
...@@ -1290,18 +1299,17 @@ public class GPUTileProcessor { ...@@ -1290,18 +1299,17 @@ public class GPUTileProcessor {
Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction, Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
Pointer.to(new int[] {num_colors}), // int colors, // number of colors (3/1) Pointer.to(new int[] {num_colors}), // int colors, // number of colors (3/1)
Pointer.to(new int[] {iis_lwir}), // int is_lwir, // do not perform shot correction Pointer.to(new int[] {iis_lwir}), // int is_lwir, // do not perform shot correction
Pointer.to(new float[] {(float) min_shot}), // float min_shot, // 10.0 Pointer.to(gpu_generate_RBGA_params), // float generate_RBGA_params[5],
Pointer.to(new float[] {(float) scale_shot}), // float scale_shot, // 3.0 // Pointer.to(new float[] {(float) min_shot}), // float min_shot, // 10.0
Pointer.to(new float[] {(float) diff_sigma}), // float diff_sigma, // pixel value/pixel change // Pointer.to(new float[] {(float) scale_shot}), // float scale_shot, // 3.0
Pointer.to(new float[] {(float) diff_threshold}),// float diff_threshold, // pixel value/pixel change // Pointer.to(new float[] {(float) diff_sigma}), // float diff_sigma, // pixel value/pixel change
Pointer.to(new float[] {(float) min_agree}), // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) // Pointer.to(new float[] {(float) diff_threshold}),// float diff_threshold, // pixel value/pixel change
// Pointer.to(new float[] {(float) min_agree}), // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G
Pointer.to(new int[] { idust_remove }), // int dust_remove, // Do not reduce average weight when only one image differes much from the average Pointer.to(new int[] { idust_remove }), // int dust_remove, // Do not reduce average weight when only one image differes much from the average
Pointer.to(new int[] {0}), // int keep_weights, // return channel weights after A in RGBA Pointer.to(new int[] {0}), // int keep_weights, // return channel weights after A in RGBA
Pointer.to(new int[] { texture_stride_rgba }), // const size_t texture_rbga_stride, // in floats Pointer.to(new int[] { texture_stride_rgba }), // const size_t texture_rbga_stride, // in floats
Pointer.to(gpu_textures_rgba), // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles Pointer.to(gpu_textures_rgba)); // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
// Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
Pointer.to(new int[] {0})); // gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cuCtxSynchronize(); cuCtxSynchronize();
// Call the kernel function // Call the kernel function
...@@ -1313,69 +1321,7 @@ public class GPUTileProcessor { ...@@ -1313,69 +1321,7 @@ public class GPUTileProcessor {
cuCtxSynchronize(); cuCtxSynchronize();
} }
public void execTextures_old( // old public void execTextures(
double [] color_weights,
boolean is_lwir,
double min_shot, // 10.0
double scale_shot, // 3.0
double diff_sigma, // pixel value/pixel change
double diff_threshold, // pixel value/pixel change
double min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
boolean dust_remove,
boolean keep_weights) {
if (GPU_TEXTURES_kernel == null)
{
IJ.showMessage("Error", "No GPU kernel: GPU_TEXTURES_kernel");
return;
}
int num_colors = color_weights.length;
if (num_colors > 3) num_colors = 3;
float [] fcolor_weights = new float[3];
fcolor_weights[0] = (float) color_weights[0];
fcolor_weights[1] = (num_colors >1)?((float) color_weights[1]):0.0f;
fcolor_weights[2] = (num_colors >2)?((float) color_weights[2]):0.0f;
cuMemcpyHtoD(gpu_color_weights, Pointer.to(fcolor_weights), fcolor_weights.length * Sizeof.FLOAT);
int iis_lwir = (is_lwir)? 1:0;
int idust_remove = (dust_remove)? 1 : 0;
int ikeep_weights = (keep_weights)? 1 : 0;
int [] GridFullWarps = {(num_texture_tiles + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1};
int [] ThreadsFullWarps = {TEXTURE_THREADS_PER_TILE, NUM_CAMS, 1};
Pointer kernelParameters = Pointer.to(
Pointer.to(new int[] {0}), // int * woi, - not used
Pointer.to(gpu_clt),
Pointer.to(new int[] { num_texture_tiles }),
Pointer.to(gpu_texture_indices),
Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
Pointer.to(new int[] { num_colors }),
Pointer.to(new int[] { iis_lwir }),
Pointer.to(new float[] {(float) min_shot }),
Pointer.to(new float[] {(float) scale_shot }),
Pointer.to(new float[] {(float) diff_sigma }),
Pointer.to(new float[] {(float) diff_threshold }),
Pointer.to(new float[] {(float) min_agree }),
Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G
Pointer.to(new int[] { idust_remove }),
Pointer.to(new int[] { ikeep_weights }),
Pointer.to(new int[] {0}),// 0, // const size_t texture_rbg_stride, // in floats - DISABLE GENERATION!
Pointer.to(new int[] {0}), // null, // new Pointer(), //Pointer.to(gpu_textures), // new Pointer(), // Pointer.to(gpu_textures),
Pointer.to(new int[] { texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
Pointer.to(gpu_textures),
Pointer.to(gpu_diff_rgb_combo)); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cuCtxSynchronize();
// Call the kernel function
cuLaunchKernel(GPU_TEXTURES_kernel,
GridFullWarps[0], GridFullWarps[1], GridFullWarps[2], // Grid dimension
ThreadsFullWarps[0], ThreadsFullWarps[1],ThreadsFullWarps[2],// Block dimension
0, null, // Shared memory size and stream (shared - only dynamic, static is in code)
kernelParameters, null); // Kernel- and extra parameters
cuCtxSynchronize();
}
public void execTextures(
double [] color_weights, double [] color_weights,
boolean is_lwir, boolean is_lwir,
double min_shot, // 10.0 double min_shot, // 10.0
...@@ -1398,6 +1344,15 @@ public class GPUTileProcessor { ...@@ -1398,6 +1344,15 @@ public class GPUTileProcessor {
fcolor_weights[2] = (num_colors >2)?((float) color_weights[2]):0.0f; fcolor_weights[2] = (num_colors >2)?((float) color_weights[2]):0.0f;
cuMemcpyHtoD(gpu_color_weights, Pointer.to(fcolor_weights), fcolor_weights.length * Sizeof.FLOAT); cuMemcpyHtoD(gpu_color_weights, Pointer.to(fcolor_weights), fcolor_weights.length * Sizeof.FLOAT);
float [] generate_RBGA_params = {
(float) min_shot, // 10.0
(float) scale_shot, // 3.0
(float) diff_sigma, // pixel value/pixel change
(float) diff_threshold, // pixel value/pixel change
(float) min_agree // minimal number of channels to agree on a point (real number to work with fuzzy averages)
};
cuMemcpyHtoD(gpu_generate_RBGA_params, Pointer.to(generate_RBGA_params), generate_RBGA_params.length * Sizeof.FLOAT);
int iis_lwir = (is_lwir)? 1:0; int iis_lwir = (is_lwir)? 1:0;
int idust_remove = (dust_remove)? 1 : 0; int idust_remove = (dust_remove)? 1 : 0;
...@@ -1415,11 +1370,12 @@ public class GPUTileProcessor { ...@@ -1415,11 +1370,12 @@ public class GPUTileProcessor {
Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction, Pointer.to(gpu_geometry_correction), // struct gc * gpu_geometry_correction,
Pointer.to(new int[] { num_colors }), Pointer.to(new int[] { num_colors }),
Pointer.to(new int[] { iis_lwir }), Pointer.to(new int[] { iis_lwir }),
Pointer.to(new float[] {(float) min_shot }), Pointer.to(gpu_generate_RBGA_params), // float generate_RBGA_params[5],
Pointer.to(new float[] {(float) scale_shot }), // Pointer.to(new float[] {(float) min_shot}), // float min_shot, // 10.0
Pointer.to(new float[] {(float) diff_sigma }), // Pointer.to(new float[] {(float) scale_shot}), // float scale_shot, // 3.0
Pointer.to(new float[] {(float) diff_threshold }), // Pointer.to(new float[] {(float) diff_sigma}), // float diff_sigma, // pixel value/pixel change
Pointer.to(new float[] {(float) min_agree }), // Pointer.to(new float[] {(float) diff_threshold}),// float diff_threshold, // pixel value/pixel change
// Pointer.to(new float[] {(float) min_agree}), // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G Pointer.to(gpu_color_weights), // float weights[3], // scale for R,B,G
Pointer.to(new int[] { idust_remove }), Pointer.to(new int[] { idust_remove }),
// Pointer.to(new int[] { 0}), // texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles // Pointer.to(new int[] { 0}), // texture_stride }), // can be a null pointer - will not be used! float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
......
...@@ -1217,41 +1217,48 @@ extern "C" __global__ void correlate2D_inner( ...@@ -1217,41 +1217,48 @@ extern "C" __global__ void correlate2D_inner(
} }
#define USE_CDP #define USE_CDP
#ifdef USE_CDP #ifdef USE_CDP
extern "C" extern "C" __global__ void generate_RBGA(
__global__ void generate_RBGA( // Parameters to generate texture tasks
// Parameters to generate texture tasks struct tp_task * gpu_tasks,
struct tp_task * gpu_tasks, int num_tiles, // number of tiles in task list
int num_tiles, // number of tiles in task list // declare arrays in device code?
// declare arrays in device code? int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation)
int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation) int * woi, // x,y,width,height of the woi
int * woi, // x,y,width,height of the woi int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1)
int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1) int height, // <= TILESY, use for faster processing of LWIR images
int height, // <= TILESY, use for faster processing of LWIR images // Parameters for the texture generation
// Parameters for the texture generation float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] // TODO: use geometry_correction rXY !
// TODO: use geometry_correction rXY ! struct gc * gpu_geometry_correction,
struct gc * gpu_geometry_correction, int colors, // number of colors (3/1)
// float * gpu_geometry_correction, int is_lwir, // do not perform shot correction
// float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate float params[5], // mitigating CUDA_ERROR_INVALID_PTX
int colors, // number of colors (3/1) /*
int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float min_shot, // 10.0
float scale_shot, // 3.0 float scale_shot, // 3.0
float diff_sigma, // pixel value/pixel change float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G */
int dust_remove, // Do not reduce average weight when only one image differs much from the average float weights[3], // scale for R,B,G
int keep_weights, // return channel weights after A in RGBA (was removed) int dust_remove, // Do not reduce average weight when only one image differs much from the average
const size_t texture_rbga_stride, // in floats int keep_weights, // return channel weights after A in RGBA (was removed)
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles const size_t texture_rbga_stride, // in floats
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
// float aaaa)
// float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{ {
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
// TODO use atomic_add to increment num_texture_tiles // TODO use atomic_add to increment num_texture_tiles
// TODO calculate woi // TODO calculate woi
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS; int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
dim3 blocks0 (blocks_x, height, 1); dim3 blocks0 (blocks_x, height, 1);
...@@ -1380,8 +1387,7 @@ __global__ void generate_RBGA( ...@@ -1380,8 +1387,7 @@ __global__ void generate_RBGA(
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024) 0, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] (float *)0);//gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
// gpu_diff_rgb_combo + ti_offset * NUM_CAMS*(colors+1)); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cudaDeviceSynchronize(); // not needed yet, just for testing cudaDeviceSynchronize(); // not needed yet, just for testing
/* */ /* */
...@@ -1848,11 +1854,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -1848,11 +1854,12 @@ extern "C" __global__ void textures_nonoverlap(
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float params[5],
float scale_shot, // 3.0 // float min_shot, // 10.0
float diff_sigma, // pixel value/pixel change // float scale_shot, // 3.0
float diff_threshold, // pixel value/pixel change // float diff_sigma, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) // float diff_threshold, // pixel value/pixel change
// float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
...@@ -1861,6 +1868,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -1861,6 +1868,12 @@ extern "C" __global__ void textures_nonoverlap(
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{ {
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1); dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1);
dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1); dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
...@@ -1902,8 +1915,7 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -1902,8 +1915,7 @@ extern "C" __global__ void textures_nonoverlap(
//#undef USE_textures_gen //#undef USE_textures_gen
extern "C" extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
__global__ void textures_accumulate( // (8,4,1) (N,1,1)
int * woi, // x, y, width,height int * woi, // x, y, width,height
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process size_t num_texture_tiles, // number of texture tiles to process
...@@ -3952,7 +3964,6 @@ __device__ void tile_combine_rgba( ...@@ -3952,7 +3964,6 @@ __device__ void tile_combine_rgba(
} }
max_diff_shared[cam] = sqrtf(mx); max_diff_shared[cam] = sqrtf(mx);
} }
__syncthreads(); //?
#ifdef DEBUG22 #ifdef DEBUG22
if (debug && (threadIdx.x == 0) && (threadIdx.y == 0)){ if (debug && (threadIdx.x == 0) && (threadIdx.y == 0)){
printf("\n 1. max_diff\n"); printf("\n 1. max_diff\n");
...@@ -4022,7 +4033,6 @@ __device__ void tile_combine_rgba( ...@@ -4022,7 +4033,6 @@ __device__ void tile_combine_rgba(
ports_rgb_shared[ncol][cam] /= DTT_SIZE2*DTT_SIZE2; // correct for window? ports_rgb_shared[ncol][cam] /= DTT_SIZE2*DTT_SIZE2; // correct for window?
} }
} }
__syncthreads(); //?
#ifdef DEBUG22 #ifdef DEBUG22
if (debug && (threadIdx.x == 0) && (threadIdx.y == 0)){ if (debug && (threadIdx.x == 0) && (threadIdx.y == 0)){
printf("\n 2. max_diff\n"); printf("\n 2. max_diff\n");
......
...@@ -86,11 +86,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -86,11 +86,12 @@ extern "C" __global__ void textures_nonoverlap(
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float params[5],
float scale_shot, // 3.0 // float min_shot, // 10.0
float diff_sigma, // pixel value/pixel change // float scale_shot, // 3.0
float diff_threshold, // pixel value/pixel change // float diff_sigma, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) // float diff_threshold, // pixel value/pixel change
// float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
...@@ -120,7 +121,7 @@ extern "C" __global__ void imclt_rbg( ...@@ -120,7 +121,7 @@ extern "C" __global__ void imclt_rbg(
int woi_twidth, int woi_twidth,
int woi_theight, int woi_theight,
const size_t dstride); // in floats (pixels) const size_t dstride); // in floats (pixels)
/*
extern "C" __global__ void generate_RBGA( extern "C" __global__ void generate_RBGA(
// Parameters to generate texture tasks // Parameters to generate texture tasks
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
...@@ -135,19 +136,19 @@ extern "C" __global__ void generate_RBGA( ...@@ -135,19 +136,19 @@ extern "C" __global__ void generate_RBGA(
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY ! // TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float params[5], // mitigating CUDA_ERROR_INVALID_PTX
float scale_shot, // 3.0 /*
float diff_sigma, // pixel value/pixel change float min_shot, // 10.0
float diff_threshold, // pixel value/pixel change float scale_shot, // 3.0
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float diff_sigma, // pixel value/pixel change
float weight0, // scale for R float diff_threshold, // pixel value/pixel change
float weight1, // scale for B float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weight2, // scale for G */
float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
*/
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment