Commit 86054c53 authored by Andrey Filippov's avatar Andrey Filippov

removing tilesX

parent 9182fde0
...@@ -866,12 +866,14 @@ __global__ void index_direct( ...@@ -866,12 +866,14 @@ __global__ void index_direct(
__global__ void index_correlate( __global__ void index_correlate(
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * gpu_corr_indices, // array of correlation tasks int * gpu_corr_indices, // array of correlation tasks
int * pnum_corr_tiles); // pointer to the length of correlation tasks array int * pnum_corr_tiles); // pointer to the length of correlation tasks array
__global__ void create_nonoverlap_list( __global__ void create_nonoverlap_list(
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * nonoverlap_list, // pointer to the calculated number of non-zero tiles int * nonoverlap_list, // pointer to the calculated number of non-zero tiles
int * pnonoverlap_length); // indices to gpu_tasks // should be initialized to zero int * pnonoverlap_length); // indices to gpu_tasks // should be initialized to zero
...@@ -957,6 +959,7 @@ extern "C" __global__ void correlate2D( ...@@ -957,6 +959,7 @@ extern "C" __global__ void correlate2D(
float fat_zero, // here - absolute float fat_zero, // here - absolute
struct tp_task * gpu_tasks, // array of per-tile tasks (now bits 4..9 - correlation pairs) struct tp_task * gpu_tasks, // array of per-tile tasks (now bits 4..9 - correlation pairs)
int num_tiles, // number of tiles in task int num_tiles, // number of tiles in task
int tilesx, // number of tile rows
int * gpu_corr_indices, // packed tile+pair int * gpu_corr_indices, // packed tile+pair
int * pnum_corr_tiles, // pointer to a number of correlation tiles to process int * pnum_corr_tiles, // pointer to a number of correlation tiles to process
const size_t corr_stride, // in floats const size_t corr_stride, // in floats
...@@ -970,6 +973,7 @@ extern "C" __global__ void correlate2D( ...@@ -970,6 +973,7 @@ extern "C" __global__ void correlate2D(
index_correlate<<<blocks0,threads0>>>( index_correlate<<<blocks0,threads0>>>(
gpu_tasks, // struct tp_task * gpu_tasks, gpu_tasks, // struct tp_task * gpu_tasks,
num_tiles, // int num_tiles, // number of tiles in task num_tiles, // int num_tiles, // number of tiles in task
tilesx, // int width, // number of tiles in a row
gpu_corr_indices, // int * gpu_corr_indices, // array of correlation tasks gpu_corr_indices, // int * gpu_corr_indices, // array of correlation tasks
pnum_corr_tiles); // int * pnum_corr_tiles); // pointer to the length of correlation tasks array pnum_corr_tiles); // int * pnum_corr_tiles); // pointer to the length of correlation tasks array
cudaDeviceSynchronize(); cudaDeviceSynchronize();
...@@ -1752,6 +1756,7 @@ __global__ void index_direct( ...@@ -1752,6 +1756,7 @@ __global__ void index_direct(
__global__ void create_nonoverlap_list( __global__ void create_nonoverlap_list(
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * nonoverlap_list, // pointer to the calculated number of non-zero tiles int * nonoverlap_list, // pointer to the calculated number of non-zero tiles
int * pnonoverlap_length) // indices to gpu_tasks // should be initialized to zero int * pnonoverlap_length) // indices to gpu_tasks // should be initialized to zero
{ {
...@@ -1763,7 +1768,8 @@ __global__ void create_nonoverlap_list( ...@@ -1763,7 +1768,8 @@ __global__ void create_nonoverlap_list(
return; // nothing to do return; // nothing to do
} }
int cxy = gpu_tasks[num_tile].txy; int cxy = gpu_tasks[num_tile].txy;
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * TILESX) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS; // int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * TILESX) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
int texture_task_code = (((cxy & 0xffff) + (cxy >> 16) * width) << CORR_NTILE_SHIFT) | (1 << LIST_TEXTURE_BIT) | TASK_TEXTURE_BITS;
if (gpu_tasks[num_tile].task != 0) { if (gpu_tasks[num_tile].task != 0) {
nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code; nonoverlap_list[atomicAdd(pnonoverlap_length, 1)] = texture_task_code;
} }
...@@ -1781,6 +1787,7 @@ __global__ void create_nonoverlap_list( ...@@ -1781,6 +1787,7 @@ __global__ void create_nonoverlap_list(
__global__ void index_correlate( __global__ void index_correlate(
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task int num_tiles, // number of tiles in task
int width, // number of tiles in a row
int * gpu_corr_indices, // array of correlation tasks int * gpu_corr_indices, // array of correlation tasks
int * pnum_corr_tiles) // pointer to the length of correlation tasks array int * pnum_corr_tiles) // pointer to the length of correlation tasks array
{ {
...@@ -1795,7 +1802,8 @@ __global__ void index_correlate( ...@@ -1795,7 +1802,8 @@ __global__ void index_correlate(
int txy = gpu_tasks[num_tile].txy; int txy = gpu_tasks[num_tile].txy;
int tx = txy & 0xffff; int tx = txy & 0xffff;
int ty = txy >> 16; int ty = txy >> 16;
int nt = ty * TILESX + tx; // int nt = ty * TILESX + tx;
int nt = ty * width + tx;
for (int b = 0; b < NUM_PAIRS; b++) if ((cm & (1 << b)) != 0) { for (int b = 0; b < NUM_PAIRS; b++) if ((cm & (1 << b)) != 0) {
gpu_corr_indices[indx++] = (nt << CORR_NTILE_SHIFT) | b; gpu_corr_indices[indx++] = (nt << CORR_NTILE_SHIFT) | b;
} }
...@@ -2011,6 +2019,7 @@ __global__ void convert_correct_tiles( ...@@ -2011,6 +2019,7 @@ __global__ void convert_correct_tiles(
extern "C" __global__ void textures_nonoverlap( extern "C" __global__ void textures_nonoverlap(
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list int num_tiles, // number of tiles in task list
// int num_tilesx, // number of tiles in a row
// declare arrays in device code? // declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
...@@ -2033,6 +2042,7 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -2033,6 +2042,7 @@ extern "C" __global__ void textures_nonoverlap(
float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles
float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] float * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
{ {
int num_tilesx = TILESX;
float min_shot = params[0]; // 10.0 float min_shot = params[0]; // 10.0
float scale_shot = params[1]; // 3.0 float scale_shot = params[1]; // 3.0
float diff_sigma = params[2]; // pixel value/pixel change float diff_sigma = params[2]; // pixel value/pixel change
...@@ -2047,8 +2057,9 @@ extern "C" __global__ void textures_nonoverlap( ...@@ -2047,8 +2057,9 @@ extern "C" __global__ void textures_nonoverlap(
create_nonoverlap_list<<<blocks0,threads0>>>( create_nonoverlap_list<<<blocks0,threads0>>>(
gpu_tasks, // struct tp_task * gpu_tasks, gpu_tasks, // struct tp_task * gpu_tasks,
num_tiles, // int num_tiles, // number of tiles in task num_tiles, // int num_tiles, // number of tiles in task
num_tilesx, // int width, // number of tiles in a row
gpu_texture_indices, // int * nonoverlap_list, // pointer to the calculated number of non-zero tiles gpu_texture_indices, // int * nonoverlap_list, // pointer to the calculated number of non-zero tiles
pnum_texture_tiles); // int * pnonoverlap_length) // indices to gpu_tasks // should be initialized to zero pnum_texture_tiles); // int * pnonoverlap_length) // indices to gpu_tasks // should be initialized to zero
cudaDeviceSynchronize(); cudaDeviceSynchronize();
dim3 threads_texture(TEXTURE_THREADS_PER_TILE, NUM_CAMS, 1); // TEXTURE_TILES_PER_BLOCK, 1); dim3 threads_texture(TEXTURE_THREADS_PER_TILE, NUM_CAMS, 1); // TEXTURE_TILES_PER_BLOCK, 1);
dim3 grid_texture((*pnum_texture_tiles + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1); dim3 grid_texture((*pnum_texture_tiles + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1);
......
...@@ -68,6 +68,7 @@ extern "C" __global__ void correlate2D( ...@@ -68,6 +68,7 @@ extern "C" __global__ void correlate2D(
float fat_zero, // here - absolute float fat_zero, // here - absolute
struct tp_task * gpu_tasks, // array of per-tile tasks (now bits 4..9 - correlation pairs) struct tp_task * gpu_tasks, // array of per-tile tasks (now bits 4..9 - correlation pairs)
int num_tiles, // number of tiles in task int num_tiles, // number of tiles in task
int tilesx, // number of tile rows
int * gpu_corr_indices, // packed tile+pair int * gpu_corr_indices, // packed tile+pair
int * pnum_corr_tiles, // pointer to a number of correlation tiles to process int * pnum_corr_tiles, // pointer to a number of correlation tiles to process
const size_t corr_stride, // in floats const size_t corr_stride, // in floats
...@@ -78,6 +79,7 @@ extern "C" __global__ void correlate2D( ...@@ -78,6 +79,7 @@ extern "C" __global__ void correlate2D(
extern "C" __global__ void textures_nonoverlap( extern "C" __global__ void textures_nonoverlap(
struct tp_task * gpu_tasks, struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list int num_tiles, // number of tiles in task list
// int num_tilesx, // number of tiles in a row
// declare arrays in device code? // declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
......
...@@ -1015,7 +1015,8 @@ int main(int argc, char **argv) ...@@ -1015,7 +1015,8 @@ int main(int argc, char **argv)
30.0, // float fat_zero, // here - absolute 30.0, // float fat_zero, // here - absolute
gpu_tasks, // struct tp_task * gpu_tasks, gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles) // number of tiles in task tp_task_size, // int num_tiles) // number of tiles in task
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair TILESX, // int tilesx, // number of tile rows
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
gpu_num_corr_tiles, // int * pnum_corr_tiles, // pointer to a number of correlation tiles to process gpu_num_corr_tiles, // int * pnum_corr_tiles, // pointer to a number of correlation tiles to process
dstride_corr/sizeof(float), // const size_t corr_stride, // in floats dstride_corr/sizeof(float), // const size_t corr_stride, // in floats
CORR_OUT_RAD, // int corr_radius, // radius of the output correlation (7 for 15x15) CORR_OUT_RAD, // int corr_radius, // radius of the output correlation (7 for 15x15)
...@@ -1089,6 +1090,8 @@ int main(int argc, char **argv) ...@@ -1089,6 +1090,8 @@ int main(int argc, char **argv)
textures_nonoverlap<<<1,1>>> ( textures_nonoverlap<<<1,1>>> (
gpu_tasks, // struct tp_task * gpu_tasks, gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list tp_task_size, // int num_tiles, // number of tiles in task list
// TILESX, // int num_tilesx, // number of tiles in a row
// declare arrays in device code? // declare arrays in device code?
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_num_texture_tiles, // int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array gpu_num_texture_tiles, // int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
...@@ -1098,18 +1101,10 @@ int main(int argc, char **argv) ...@@ -1098,18 +1101,10 @@ int main(int argc, char **argv)
texture_colors, // int colors, // number of colors (3/1) texture_colors, // int colors, // number of colors (3/1)
(texture_colors == 1), // int is_lwir, // do not perform shot correction (texture_colors == 1), // int is_lwir, // do not perform shot correction
gpu_generate_RBGA_params, gpu_generate_RBGA_params,
/*
10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
3.0, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
*/
gpu_color_weights, // float weights[3], // scale for R gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average 1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null ) // combining both non-overlap and overlap (each calculated if pointer is not null )
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed 0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
// gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed (float *) 0, // gpu_textures, // float * gpu_texture_tiles, // (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed
gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
getLastCudaError("Kernel failure"); getLastCudaError("Kernel failure");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment