Commit 70ff9492 authored by Andrey Filippov's avatar Andrey Filippov

consolidated 5 float inputs to an array of 5 floats to solve CUDA_ERROR_INVALID_PTX

parent fcd21d6e
...@@ -1217,8 +1217,7 @@ extern "C" __global__ void correlate2D_inner( ...@@ -1217,8 +1217,7 @@ extern "C" __global__ void correlate2D_inner(
} }
#define USE_CDP #define USE_CDP
#ifdef USE_CDP #ifdef USE_CDP
extern "C" extern "C" __global__ void generate_RBGA(
__global__ void generate_RBGA(
// Parameters to generate texture tasks // Parameters to generate texture tasks
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list int num_tiles, // number of tiles in task list
...@@ -1232,26 +1231,40 @@ __global__ void generate_RBGA( ...@@ -1232,26 +1231,40 @@ __global__ void generate_RBGA(
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY ! // TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
// float * gpu_geometry_correction,
// float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float params[5],
/*
float min_shot, // 10.0 float min_shot, // 10.0
float scale_shot, // 3.0 float scale_shot, // 3.0
float diff_sigma, // pixel value/pixel change float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
*/
float weights[3], // scale for R,B,G float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) int keep_weights, // return channel weights after A in RGBA (was removed)
const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
// float aaaa)
// float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
/*
const size_t texture_rbga_stride, // in floats const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] float aaaa)
*/
{ {
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
// TODO use atomic_add to increment num_texture_tiles // TODO use atomic_add to increment num_texture_tiles
// TODO calculate woi // TODO calculate woi
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS; int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
dim3 blocks0 (blocks_x, height, 1); dim3 blocks0 (blocks_x, height, 1);
...@@ -1380,8 +1393,7 @@ __global__ void generate_RBGA( ...@@ -1380,8 +1393,7 @@ __global__ void generate_RBGA(
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024) 0, // size_t texture_stride, // in floats (now 256*4 = 1024)
gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles gpu_texture_tiles, //(float *)0);// float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] (float *)0);//gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
// gpu_diff_rgb_combo + ti_offset * NUM_CAMS*(colors+1)); // float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
cudaDeviceSynchronize(); // not needed yet, just for testing cudaDeviceSynchronize(); // not needed yet, just for testing
/* */ /* */
...@@ -1848,11 +1860,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -1848,11 +1860,12 @@ extern "C" __global__ void textures_nonoverlap(
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float params[5],
float scale_shot, // 3.0 // float min_shot, // 10.0
float diff_sigma, // pixel value/pixel change // float scale_shot, // 3.0
float diff_threshold, // pixel value/pixel change // float diff_sigma, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) // float diff_threshold, // pixel value/pixel change
// float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
...@@ -1861,6 +1874,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -1861,6 +1874,12 @@ extern "C" __global__ void textures_nonoverlap(
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{ {
float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change
float diff_threshold = params[3]; // pixel value/pixel change
float min_agree = params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1); dim3 threads0(CONVERT_DIRECT_INDEXING_THREADS, 1, 1);
dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1); dim3 blocks0 ((num_tiles + CONVERT_DIRECT_INDEXING_THREADS -1) >> CONVERT_DIRECT_INDEXING_THREADS_LOG2,1, 1);
......
...@@ -86,11 +86,12 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -86,11 +86,12 @@ extern "C" __global__ void textures_nonoverlap(
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float params[5],
float scale_shot, // 3.0 // float min_shot, // 10.0
float diff_sigma, // pixel value/pixel change // float scale_shot, // 3.0
float diff_threshold, // pixel value/pixel change // float diff_sigma, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) // float diff_threshold, // pixel value/pixel change
// float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
// int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
...@@ -120,13 +121,24 @@ extern "C" __global__ void imclt_rbg( ...@@ -120,13 +121,24 @@ extern "C" __global__ void imclt_rbg(
int woi_twidth, int woi_twidth,
int woi_theight, int woi_theight,
const size_t dstride); // in floats (pixels) const size_t dstride); // in floats (pixels)
extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
int * woi, // x, y, width,height /*
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] extern "C" __global__ void generate_RBGA(
size_t num_texture_tiles, // number of texture tiles to process // Parameters to generate texture tasks
struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
// declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation)
int * woi, // x,y,width,height of the woi
int width, // <= TILESX, use for faster processing of LWIR images (should be actual + 1)
int height, // <= TILESY, use for faster processing of LWIR images
// Parameters for the texture generation
float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY ! // TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
// float * gpu_geometry_correction,
// float * gpu_port_offsets, // relative ports x,y offsets - just to scale differences, may be approximate
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float min_shot, // 10.0 float min_shot, // 10.0
...@@ -136,11 +148,8 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -136,11 +148,8 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? int keep_weights, // return channel weights after A in RGBA (was removed)
// combining both non-overlap and overlap (each calculated if pointer is not null ) const size_t texture_rbga_stride, // in floats
size_t texture_rbg_stride, // in floats
float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
size_t texture_stride, // in floats (now 256*4 = 1024)
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
*/
...@@ -348,6 +348,7 @@ int main(int argc, char **argv) ...@@ -348,6 +348,7 @@ int main(int argc, char **argv)
int * gpu_num_texture_tiles; int * gpu_num_texture_tiles;
float * gpu_port_offsets; float * gpu_port_offsets;
float * gpu_color_weights; float * gpu_color_weights;
float * gpu_generate_RBGA_params;
int num_corrs; int num_corrs;
int num_textures; int num_textures;
int num_ports = NUM_CAMS; int num_ports = NUM_CAMS;
...@@ -562,9 +563,17 @@ int main(int argc, char **argv) ...@@ -562,9 +563,17 @@ int main(int argc, char **argv)
0.294118, // float weight0, // scale for R 0.294118, // float weight0, // scale for R
0.117647, // float weight1, // scale for B 0.117647, // float weight1, // scale for B
0.588235}; // float weight2, // scale for G 0.588235}; // float weight2, // scale for G
float generate_RBGA_params[]={
10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
3.0 // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
};
gpu_port_offsets = (float *) copyalloc_kernel_gpu((float * ) port_offsets, num_ports * 2); gpu_port_offsets = (float *) copyalloc_kernel_gpu((float * ) port_offsets, num_ports * 2);
gpu_color_weights = (float *) copyalloc_kernel_gpu((float * ) color_weights, sizeof(color_weights)); gpu_color_weights = (float *) copyalloc_kernel_gpu((float * ) color_weights, sizeof(color_weights));
gpu_generate_RBGA_params = (float *) copyalloc_kernel_gpu((float * ) generate_RBGA_params, sizeof(generate_RBGA_params));
...@@ -1088,11 +1097,14 @@ int main(int argc, char **argv) ...@@ -1088,11 +1097,14 @@ int main(int argc, char **argv)
gpu_geometry_correction, // struct gc * gpu_geometry_correction, gpu_geometry_correction, // struct gc * gpu_geometry_correction,
texture_colors, // int colors, // number of colors (3/1) texture_colors, // int colors, // number of colors (3/1)
(texture_colors == 1), // int is_lwir, // do not perform shot correction (texture_colors == 1), // int is_lwir, // do not perform shot correction
gpu_generate_RBGA_params,
/*
10.0, // float min_shot, // 10.0 10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0 3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change 1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change 10.0f, // float diff_threshold, // pixel value/pixel change
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) 3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
*/
gpu_color_weights, // float weights[3], // scale for R gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average 1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null ) // combining both non-overlap and overlap (each calculated if pointer is not null )
...@@ -1267,11 +1279,14 @@ int main(int argc, char **argv) ...@@ -1267,11 +1279,14 @@ int main(int argc, char **argv)
gpu_geometry_correction, // struct gc * gpu_geometry_correction, gpu_geometry_correction, // struct gc * gpu_geometry_correction,
texture_colors, // int colors, // number of colors (3/1) texture_colors, // int colors, // number of colors (3/1)
(texture_colors == 1), // int is_lwir, // do not perform shot correction (texture_colors == 1), // int is_lwir, // do not perform shot correction
gpu_generate_RBGA_params,
/*
10.0, // float min_shot, // 10.0 10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0 3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change 1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change 10.0f, // float diff_threshold, // pixel value/pixel change
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) 3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
*/
gpu_color_weights, // float weights[3], // scale for R gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average 1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
0, // int keep_weights, // return channel weights after A in RGBA 0, // int keep_weights, // return channel weights after A in RGBA
...@@ -1366,6 +1381,7 @@ int main(int argc, char **argv) ...@@ -1366,6 +1381,7 @@ int main(int argc, char **argv)
checkCudaErrors(cudaFree(gpu_texture_indices)); checkCudaErrors(cudaFree(gpu_texture_indices));
checkCudaErrors(cudaFree(gpu_port_offsets)); checkCudaErrors(cudaFree(gpu_port_offsets));
checkCudaErrors(cudaFree(gpu_color_weights)); checkCudaErrors(cudaFree(gpu_color_weights));
checkCudaErrors(cudaFree(gpu_generate_RBGA_params));
checkCudaErrors(cudaFree(gpu_textures)); checkCudaErrors(cudaFree(gpu_textures));
checkCudaErrors(cudaFree(gpu_textures_rbga)); checkCudaErrors(cudaFree(gpu_textures_rbga));
checkCudaErrors(cudaFree(gpu_diff_rgb_combo)); checkCudaErrors(cudaFree(gpu_diff_rgb_combo));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment