Commit 0182bb3c authored by Andrey Filippov's avatar Andrey Filippov

consolidated 5 float inputs to an array of 5 floats to solve CUDA_ERROR_INVALID_PTX

parent 02f6e62d
...@@ -1217,41 +1217,48 @@ extern "C" __global__ void correlate2D_inner( ...@@ -1217,41 +1217,48 @@ extern "C" __global__ void correlate2D_inner(
} }
#define USE_CDP #define USE_CDP
#ifdef USE_CDP #ifdef USE_CDP
extern "C" extern "C" __global__ void generate_RBGA(
__global__ void generate_RBGA( // Parameters to generate texture tasks
// Parameters to generate texture tasks struct tp_task * gpu_tasks,
struct tp_task * gpu_tasks, int num_tiles, // number of tiles in task list
int num_tiles, // number of tiles in task list // declare arrays in device code?
// declare arrays in device code? int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation)
int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation) int * woi, // x,y,width,height of the woi
int * woi, // x,y,width,height of the woi int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1)
int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1) int height, // <= TILESY, use for faster processing of LWIR images
int height, // <= TILESY, use for faster processing of LWIR images // Parameters for the texture generation
// Parameters for the texture generation float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] // TODO: use geometry_correction rXY !
// TODO: use geometry_correction rXY ! struct gc * gpu_geometry_correction,
struct gc * gpu_geometry_correction, int colors, // number of colors (3/1)
// float * gpu_geometry_correction, int is_lwir, // do not perform shot correction
// float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate float params[5], // mitigating CUDA_ERROR_INVALID_PTX
int colors, // number of colors (3/1) /*
int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float min_shot, // 10.0
float scale_shot, // 3.0 float scale_shot, // 3.0
float diff_sigma, // pixel value/pixel change float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G */
int dust_remove, // Do not reduce average weight when only one image differs much from the average float weights[3], // scale for R,B,G
int keep_weights, // return channel weights after A in RGBA (was removed) int dust_remove, // Do not reduce average weight when only one image differs much from the average
const size_t texture_rbga_stride, // in floats int keep_weights, // return channel weights after A in RGBA (was removed)
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles const size_t texture_rbga_stride, // in floats
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
// float aaaa)
// float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{ {
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
// TODO use atomic_add to increment num_texture_tiles // TODO use atomic_add to increment num_texture_tiles
// TODO calculate woi // TODO calculate woi
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS; int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
dim3 blocks0 (blocks_x, height, 1); dim3 blocks0 (blocks_x, height, 1);
...@@ -1380,8 +1387,7 @@ __global__ void generate_RBGA( ...@@ -1380,8 +1387,7 @@ __global__ void generate_RBGA(
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024) 0, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] (float *)0);//gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
// gpu_diff_rgb_combo + ti_offset * NUM_CAMS*(colors+1)); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cudaDeviceSynchronize(); // not needed yet, just for testing cudaDeviceSynchronize(); // not needed yet, just for testing
/* */ /* */
...@@ -1848,11 +1854,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -1848,11 +1854,12 @@ extern "C" __global__ void textures_nonoverlap(
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float params[5],
float scale_shot, // 3.0 // float min_shot, // 10.0
float diff_sigma, // pixel value/pixel change // float scale_shot, // 3.0
float diff_threshold, // pixel value/pixel change // float diff_sigma, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) // float diff_threshold, // pixel value/pixel change
// float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
...@@ -1861,6 +1868,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -1861,6 +1868,12 @@ extern "C" __global__ void textures_nonoverlap(
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{ {
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1); dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1);
dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1); dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
...@@ -1902,8 +1915,7 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -1902,8 +1915,7 @@ extern "C" __global__ void textures_nonoverlap(
//#undef USE_textures_gen //#undef USE_textures_gen
extern "C" extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
__global__ void textures_accumulate( // (8,4,1) (N,1,1)
int * woi, // x, y, width,height int * woi, // x, y, width,height
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
size_t num_texture_tiles, // number of texture tiles to process size_t num_texture_tiles, // number of texture tiles to process
...@@ -3952,7 +3964,6 @@ __device__ void tile_combine_rgba( ...@@ -3952,7 +3964,6 @@ __device__ void tile_combine_rgba(
} }
max_diff_shared[cam] = sqrtf(mx); max_diff_shared[cam] = sqrtf(mx);
} }
__syncthreads(); //?
#ifdef DEBUG22 #ifdef DEBUG22
if (debug && (threadIdx.x == 0) && (threadIdx.y == 0)){ if (debug && (threadIdx.x == 0) && (threadIdx.y == 0)){
printf("\n 1. max_diff\n"); printf("\n 1. max_diff\n");
...@@ -4022,7 +4033,6 @@ __device__ void tile_combine_rgba( ...@@ -4022,7 +4033,6 @@ __device__ void tile_combine_rgba(
ports_rgb_shared[ncol][cam] /= DTT_SIZE2*DTT_SIZE2; // correct for window? ports_rgb_shared[ncol][cam] /= DTT_SIZE2*DTT_SIZE2; // correct for window?
} }
} }
__syncthreads(); //?
#ifdef DEBUG22 #ifdef DEBUG22
if (debug && (threadIdx.x == 0) && (threadIdx.y == 0)){ if (debug && (threadIdx.x == 0) && (threadIdx.y == 0)){
printf("\n 2. max_diff\n"); printf("\n 2. max_diff\n");
......
...@@ -86,11 +86,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -86,11 +86,12 @@ extern "C" __global__ void textures_nonoverlap(
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float params[5],
float scale_shot, // 3.0 // float min_shot, // 10.0
float diff_sigma, // pixel value/pixel change // float scale_shot, // 3.0
float diff_threshold, // pixel value/pixel change // float diff_sigma, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) // float diff_threshold, // pixel value/pixel change
// float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
...@@ -120,7 +121,7 @@ extern "C" __global__ void imclt_rbg( ...@@ -120,7 +121,7 @@ extern "C" __global__ void imclt_rbg(
int woi_twidth, int woi_twidth,
int woi_theight, int woi_theight,
const size_t dstride); // in floats (pixels) const size_t dstride); // in floats (pixels)
/*
extern "C" __global__ void generate_RBGA( extern "C" __global__ void generate_RBGA(
// Parameters to generate texture tasks // Parameters to generate texture tasks
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
...@@ -135,19 +136,19 @@ extern "C" __global__ void generate_RBGA( ...@@ -135,19 +136,19 @@ extern "C" __global__ void generate_RBGA(
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY ! // TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float params[5], // mitigating CUDA_ERROR_INVALID_PTX
float scale_shot, // 3.0 /*
float diff_sigma, // pixel value/pixel change float min_shot, // 10.0
float diff_threshold, // pixel value/pixel change float scale_shot, // 3.0
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float diff_sigma, // pixel value/pixel change
float weight0, // scale for R float diff_threshold, // pixel value/pixel change
float weight1, // scale for B float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weight2, // scale for G */
float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
*/
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment