Commit 13f515b9 authored by Andrey Filippov's avatar Andrey Filippov

Refactoring

parent 821b753a
...@@ -724,7 +724,7 @@ __device__ void tile_combine_rgba( ...@@ -724,7 +724,7 @@ __device__ void tile_combine_rgba(
// next not used // next not used
// boolean diff_gauss, // when averaging images, use gaussian around average as weight (false - sharp all/nothing) // boolean diff_gauss, // when averaging images, use gaussian around average as weight (false - sharp all/nothing)
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float * chn_weights, // color channel weights, sum == 1.0 const float * chn_weights, // color channel weights, sum == 1.0
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // eturn channel weights and rms after A in RGBA (weight are always calculated, not so for the crms) int keep_weights, // eturn channel weights and rms after A in RGBA (weight are always calculated, not so for the crms)
int debug); int debug);
...@@ -923,7 +923,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -923,7 +923,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
float diff_sigma, // pixel value/pixel change float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G const float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null ) // combining both non-overlap and overlap (each calculated if pointer is not null )
...@@ -3311,7 +3311,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3311,7 +3311,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
float diff_sigma, // pixel value/pixel change float diff_sigma, // pixel value/pixel change
float diff_threshold, // pixel value/pixel change float diff_threshold, // pixel value/pixel change
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float weights[3], // scale for R,B,G const float weights[3], // scale for R,B,G
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? Now +2 - output raw channels int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? Now +2 - output raw channels
// combining both non-overlap and overlap (each calculated if pointer is not null ) // combining both non-overlap and overlap (each calculated if pointer is not null )
...@@ -5483,7 +5483,7 @@ __device__ void tile_combine_rgba( ...@@ -5483,7 +5483,7 @@ __device__ void tile_combine_rgba(
// next not used // next not used
// boolean diff_gauss, // when averaging images, use gaussian around average as weight (false - sharp all/nothing) // boolean diff_gauss, // when averaging images, use gaussian around average as weight (false - sharp all/nothing)
float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
float * chn_weights, // color channel weights, sum == 1.0 const float * chn_weights, // color channel weights, sum == 1.0
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated, not so for the crms) int keep_weights, // return channel weights and rms after A in RGBA (weight are always calculated, not so for the crms)
int debug) int debug)
......
class GenerateRgbaHost {
GenerateRgbaHost(){
}
~GenerateRgbaHost(){
}
void generate_RBGA_host(
int num_cams, // number of cameras used
// Parameters to generate texture tasks
float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// struct tp_task * gpu_tasks,
int num_tiles, // number of tiles in task list
// declare arrays in device code?
int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
int * gpu_num_texture_tiles, // number of texture tiles to process (8 separate elements for accumulation)
int * gpu_woi, // x,y,width,height of the woi
int width, // <= TILES-X, use for faster processing of LWIR images (should be actual + 1)
int height, // <= TILES-Y, use for faster processing of LWIR images
// Parameters for the texture generation
float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction
const float cpu_params[5], // mitigating CUDA_ERROR_INVALID_PTX
const float weights[3], // scale for R,B,G should be host_array, not gpu
int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
const size_t texture_rbga_stride, // in floats
float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
{
int cpu_woi[4];
int cpu_num_texture_tiles[8];
float min_shot = cpu_params[0]; // 10.0
float scale_shot = cpu_params[1]; // 3.0
float diff_sigma = cpu_params[2]; // pixel value/pixel change
float diff_threshold = cpu_params[3]; // pixel value/pixel change
float min_agree = cpu_params[4]; // minimal number of channels to agree on a point (real number to work with fuzzy averages)
int tilesya = ((height +3) & (~3)); //#define TILES-YA ((TILES-Y +3) & (~3))
dim3 threads0((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x = (width + ((1 << THREADS_DYNAMIC_BITS) - 1)) >> THREADS_DYNAMIC_BITS;
dim3 blocks0 (blocks_x, height, 1);
clear_texture_list<<<blocks0,threads0>>>(
gpu_texture_indices,
width,
height);
checkCudaErrors(cudaDeviceSynchronize()); // not needed yet, just for testing
dim3 threads((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_t = (num_tiles + ((1 << THREADS_DYNAMIC_BITS)) -1) >> THREADS_DYNAMIC_BITS;//
dim3 blocks(blocks_t, 1, 1);
// mark used tiles in gpu_texture_indices memory
mark_texture_tiles <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
num_tiles, // number of tiles in task list
width, // number of tiles in a row
gpu_texture_indices); // packed tile + bits (now only (1 << 7)
checkCudaErrors(cudaDeviceSynchronize());
// mark n/e/s/w used tiles from gpu_texture_indices memory to gpu_tasks lower 4 bits
checkCudaErrors(cudaMemcpy(
(float * ) cpu_woi,
gpu_woi,
4 * sizeof(float),
cudaMemcpyDeviceToHost));
cpu_woi[0] = width;
cpu_woi[1] = height;
cpu_woi[2] = 0;
cpu_woi[3] = 0;
checkCudaErrors(cudaMemcpy(
gpu_woi,
cpu_woi,
4 * sizeof(float),
cudaMemcpyHostToDevice));
/*
*(woi + 0) = width; // TILES-X;
*(woi + 1) = height; // TILES-Y;
*(woi + 2) = 0; // maximal x
*(woi + 3) = 0; // maximal y
int * gpu_woi = (int *) copyalloc_kernel_gpu(
(float * ) woi,
4); // number of elements
*/
// TODO: create gpu_woi to pass (copy from woi)
// set lower 4 bits in each gpu_ftasks task
mark_texture_neighbor_tiles <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
num_tiles, // number of tiles in task list
width, // number of tiles in a row
height, // number of tiles rows
gpu_texture_indices, // packed tile + bits (now only (1 << 7)
gpu_woi); // min_x, min_y, max_x, max_y
checkCudaErrors(cudaDeviceSynchronize());
/*
checkCudaErrors(cudaMemcpy( //
(float * ) cpu_num_texture_tiles,
gpu_num_texture_tiles,
8 * sizeof(float),
cudaMemcpyDeviceToHost));
*/
// Generate tile indices list, upper 24 bits - tile index, lower 4 bits: n/e/s/w neighbors, bit 7 - set to 1
for (int i = 0; i <8; i++){
cpu_num_texture_tiles[i] = 0;
}
/*
*(num_texture_tiles+0) = 0;
*(num_texture_tiles+1) = 0;
*(num_texture_tiles+2) = 0;
*(num_texture_tiles+3) = 0;
*(num_texture_tiles+4) = 0;
*(num_texture_tiles+5) = 0;
*(num_texture_tiles+6) = 0;
*(num_texture_tiles+7) = 0;
*/
// copy zeroed num_texture_tiles
// int * gpu_num_texture_tiles = (int *) copyalloc_kernel_gpu(
// (float * ) num_texture_tiles,
// 8); // number of elements
checkCudaErrors(cudaMemcpy(
gpu_num_texture_tiles,
cpu_num_texture_tiles,
8 * sizeof(float),
cudaMemcpyHostToDevice));
gen_texture_list <<<blocks,threads>>>(
num_cams, // int num_cams,
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
num_tiles, // number of tiles in task list
width, // number of tiles in a row
height, // int height, // number of tiles rows
gpu_texture_indices, // packed tile + bits (now only (1 << 7)
gpu_num_texture_tiles, // number of texture tiles to process
gpu_woi); // x,y, here woi[2] = max_X, woi[3] - max-Y
checkCudaErrors(cudaDeviceSynchronize()); // not needed yet, just for testing
// copy gpu_woi back to host woi
checkCudaErrors(cudaMemcpy(
(float * ) cpu_woi,
gpu_woi,
4 * sizeof(float),
cudaMemcpyDeviceToHost));
// *(cpu_woi + 2) += 1 - *(cpu_woi + 0); // width (was min and max)
// *(cpu_woi + 3) += 1 - *(cpu_woi + 1); // height (was min and max)
cpu_woi[2] += 1 - cpu_woi[0]; // width (was min and max)
cpu_woi[3] += 1 - cpu_woi[1]; // height (was min and max)
// copy host-modified data back to GPU
checkCudaErrors(cudaMemcpy(
gpu_woi,
cpu_woi,
4 * sizeof(float),
cudaMemcpyHostToDevice));
// copy gpu_num_texture_tiles back to host num_texture_tiles
checkCudaErrors(cudaMemcpy(
(float * ) cpu_num_texture_tiles,
gpu_num_texture_tiles,
8 * sizeof(float),
cudaMemcpyDeviceToHost));
// Zero output textures. Trim
// texture_rbga_stride
// int texture_width = (*(cpu_woi + 2) + 1) * DTT_SIZE;
// int texture_tiles_height = (*(cpu_woi + 3) + 1) * DTT_SIZE;
int texture_width = (cpu_woi[2] + 1) * DTT_SIZE;
int texture_tiles_height = (cpu_woi[3] + 1) * DTT_SIZE;
int texture_slices = colors + 1;
if (keep_weights & 2){
texture_slices += colors * num_cams;
}
dim3 threads2((1 << THREADS_DYNAMIC_BITS), 1, 1);
int blocks_x2 = (texture_width + ((1 << (THREADS_DYNAMIC_BITS + DTT_SIZE_LOG2 )) - 1)) >> (THREADS_DYNAMIC_BITS + DTT_SIZE_LOG2);
dim3 blocks2 (blocks_x2, texture_tiles_height * texture_slices, 1); // each thread - 8 vertical
#ifdef DEBUG8A
int cpu_texture_indices [TILESX*TILESYA];
checkCudaErrors(cudaMemcpy(
(float * ) cpu_texture_indices,
gpu_texture_indices,
TILESX*TILESYA * sizeof(float),
cudaMemcpyDeviceToHost));
for (int i = 0; i < 256; i++){
int indx = cpu_texture_indices[i];
printf("%02d %04x %03d %03d %x\n",i,indx, (indx>>8) / 80, (indx >> 8) % 80, indx&0xff);
}
#endif // #ifdef DEBUG8A
clear_texture_rbga<<<blocks2,threads2>>>( // illegal value error
texture_width,
texture_tiles_height * texture_slices, // int texture_slice_height,
texture_rbga_stride, // const size_t texture_rbga_stride, // in floats 8*stride
gpu_texture_tiles) ; // float * gpu_texture_tiles);
// Run 8 times - first 4 1-tile offsets inner tiles (w/o verifying margins), then - 4 times with verification and ignoring 4-pixel
// oversize (border 16x 16 tiles overhang by 4 pixels)
checkCudaErrors(cudaDeviceSynchronize()); // not needed yet, just for testing
for (int pass = 0; pass < 8; pass++){
int num_cams_per_thread = NUM_THREADS / TEXTURE_THREADS_PER_TILE; // 4 cameras parallel, then repeat
dim3 threads_texture(TEXTURE_THREADS_PER_TILE, num_cams_per_thread, 1); // TEXTURE_TILES_PER_BLOCK, 1);
int border_tile = pass >> 2;
int ntt = *(cpu_num_texture_tiles + ((pass & 3) << 1) + border_tile);
int *pntt = gpu_num_texture_tiles + ((pass & 3) << 1) + border_tile;
dim3 grid_texture((ntt + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1); // TEXTURE_TILES_PER_BLOCK = 1
/* before CDP2
int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILES-X * (TILES-YA >> 2)); // 1/4
if (border_tile){
ti_offset += width * (tilesya >> 2) - ntt; // TILES-X * (TILES-YA >> 2) - ntt;
}
*/
// for CDP2
int ti_offset = (pass & 3) * (width * (tilesya >> 2)); // (TILES-X * (TILES-YA >> 2)); // 1/4
if (border_tile){
// ti_offset += width * (tilesya >> 2) - ntt; // TILES-X * (TILES-YA >> 2) - ntt;
ti_offset += width * (tilesya >> 2); // TILES-X * (TILES-YA >> 2) - ntt;
ti_offset = - ti_offset; // does not depend on results of the previous kernel, but is negative
}
#ifdef DEBUG8A
printf("\ngenerate_RBGA() pass= %d, border_tile= %d, ti_offset= %d, ntt=%d\n",
pass, border_tile,ti_offset, ntt);
printf("\ngenerate_RBGA() gpu_texture_indices= %p, gpu_texture_indices + ti_offset= %p\n",
(void *) gpu_texture_indices, (void *) (gpu_texture_indices + ti_offset));
printf("\ngenerate_RBGA() grid_texture={%d, %d, %d)\n",
grid_texture.x, grid_texture.y, grid_texture.z);
printf("\ngenerate_RBGA() threads_texture={%d, %d, %d)\n",
threads_texture.x, threads_texture.y, threads_texture.z);
printf("\n");
#endif
/* */
int shared_size = host_get_textures_shared_size( // in bytes
num_cams, // int num_cams, // actual number of cameras
colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
0); // int * offsets); // in floats
printf("\n2. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, colors);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_size); // 65536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
textures_accumulate <<<grid_texture,threads_texture, shared_size>>>(
num_cams, // int num_cams, // number of cameras used
gpu_woi, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
pntt, // ntt, // int * num_texture_tiles, // number of texture tiles to process
ti_offset, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
// gpu_texture_indices + ti_offset, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
colors, // int colors, // number of colors (3/1)
is_lwir, // int is_lwir, // do not perform shot correction
min_shot, // float min_shot, // 10.0
scale_shot, // float scale_shot, // 3.0
diff_sigma, // float diff_sigma, // pixel value/pixel change
diff_threshold, // float diff_threshold, // pixel value/pixel change
min_agree, // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
weights, // float weights[3], // scale for R,B,G
dust_remove, // int dust_remove, // Do not reduce average weight when only one image differs much from the average
keep_weights, // int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
// combining both non-overlap and overlap (each calculated if pointer is not null )
texture_rbga_stride, // size_t texture_rbg_stride, // in floats
gpu_texture_tiles, // float * gpu_texture_rbg, // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // size_t texture_stride, // in floats (now 256*4 = 1024)
(float *) 0, // gpu_texture_tiles, // float * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
0, // 1, // int linescan_order, // if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
(float *)0, //);//gpu_diff_rgb_combo); // float * gpu_diff_rgb_combo) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
width);
checkCudaErrors(cudaDeviceSynchronize()); // not needed yet, just for testing
/* */
}
// checkCudaErrors(cudaFree(gpu_woi));
// checkCudaErrors(cudaFree(gpu_num_texture_tiles));
// __syncthreads();
}
};
...@@ -60,6 +60,7 @@ ...@@ -60,6 +60,7 @@
#include "TileProcessor.cuh" #include "TileProcessor.cuh"
#include "tp_utils.h" #include "tp_utils.h"
#include "tp_files.h" #include "tp_files.h"
#include "tp_paths.h"
#if TEST_LWIR #if TEST_LWIR
#define IMG_WIDTH 640 #define IMG_WIDTH 640
...@@ -79,74 +80,7 @@ ...@@ -79,74 +80,7 @@
#define TILESY (IMG_HEIGHT / DTT_SIZE) #define TILESY (IMG_HEIGHT / DTT_SIZE)
#define TILESYA ((TILESY +3) & (~3)) #define TILESYA ((TILESY +3) & (~3))
#include "generate_RGBA_host.inc"
// Prepare low pass filter (64 long) to be applied to each quadrant of the CLT data
void set_clt_lpf(
float * lpf, // size*size array to be filled out
float sigma,
const int dct_size)
{
int dct_len = dct_size * dct_size;
if (sigma == 0.0f) {
lpf[0] = 1.0f;
for (int i = 1; i < dct_len; i++){
lpf[i] = 0.0;
}
} else {
for (int i = 0; i < dct_size; i++){
for (int j = 0; j < dct_size; j++){
lpf[i*dct_size+j] = exp(-(i*i+j*j)/(2*sigma));
}
}
// normalize
double sum = 0;
for (int i = 0; i < dct_size; i++){
for (int j = 0; j < dct_size; j++){
double d = lpf[i*dct_size+j];
d*=cos(M_PI*i/(2*dct_size))*cos(M_PI*j/(2*dct_size));
if (i > 0) d*= 2.0;
if (j > 0) d*= 2.0;
sum +=d;
}
}
for (int i = 0; i< dct_len; i++){
lpf[i] /= sum;
}
}
}
int host_get_textures_shared_size( // in bytes
//__device__ int get_textures_shared_size( // in bytes
int num_cams, // actual number of cameras
int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
int * offsets){ // in floats
// int shared_floats = 0;
int offs = 0;
// int texture_threads_per_tile = TEXTURE_THREADS/num_cams;
if (offsets) offsets[0] = offs;
offs += num_cams * num_colors * 2 * DTT_SIZE * DTT_SIZE21; //float mclt_tiles [NUM_CAMS][NUM_COLORS][2*DTT_SIZE][DTT_SIZE21]
if (offsets) offsets[1] = offs;
offs += num_cams * num_colors * 4 * DTT_SIZE * DTT_SIZE1; // float clt_tiles [NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE1]
if (offsets) offsets[2] = offs;
// offs += num_cams * num_colors * DTT_SIZE2 * DTT_SIZE21; //float mclt_tmp [NUM_CAMS][NUM_COLORS][DTT_SIZE2][DTT_SIZE21];
int mclt_tmp_size = num_cams * num_colors * DTT_SIZE2 * DTT_SIZE21; // [NUM_CAMS][NUM_COLORS][DTT_SIZE2][DTT_SIZE21]
int rgbaw_size = (2* (num_colors + 1) + num_cams) * DTT_SIZE2 * DTT_SIZE21; // [NUM_COLORS + 1 + NUM_CAMS + NUM_COLORS + 1][DTT_SIZE2][DTT_SIZE21]
offs += (rgbaw_size > mclt_tmp_size) ? rgbaw_size : mclt_tmp_size;
if (offsets) offsets[3] = offs;
offs += num_cams * 2; // float port_offsets [NUM_CAMS][2];
if (offsets) offsets[4] = offs;
offs += num_colors * num_cams; // float ports_rgb_shared [NUM_COLORS][NUM_CAMS];
if (offsets) offsets[5] = offs;
offs += num_cams; // float max_diff_shared [NUM_CAMS];
if (offsets) offsets[6] = offs;
offs += num_cams * TEXTURE_THREADS_PER_TILE; // float max_diff_tmp [NUM_CAMS][TEXTURE_THREADS_PER_TILE]
if (offsets) offsets[7] = offs;
offs += num_colors * num_cams * TEXTURE_THREADS_PER_TILE; //float ports_rgb_tmp [NUM_COLORS][NUM_CAMS][TEXTURE_THREADS_PER_TILE];
if (offsets) offsets[8] = offs;
return sizeof(float) * offs; // shared_floats;
}
void generate_RBGA_host( void generate_RBGA_host(
int num_cams, // number of cameras used int num_cams, // number of cameras used
...@@ -166,8 +100,8 @@ void generate_RBGA_host( ...@@ -166,8 +100,8 @@ void generate_RBGA_host(
struct gc * gpu_geometry_correction, struct gc * gpu_geometry_correction,
int colors, // number of colors (3/1) int colors, // number of colors (3/1)
int is_lwir, // do not perform shot correction int is_lwir, // do not perform shot correction
float cpu_params[5], // mitigating CUDA_ERROR_INVALID_PTX const float cpu_params[5], // mitigating CUDA_ERROR_INVALID_PTX
float weights[3], // scale for R,B,G should be host_array, not gpu const float weights[3], // scale for R,B,G should be host_array, not gpu
int dust_remove, // Do not reduce average weight when only one image differs much from the average int dust_remove, // Do not reduce average weight when only one image differs much from the average
int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)? int keep_weights, // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
const size_t texture_rbga_stride, // in floats const size_t texture_rbga_stride, // in floats
...@@ -416,6 +350,7 @@ void generate_RBGA_host( ...@@ -416,6 +350,7 @@ void generate_RBGA_host(
/* */ /* */
} }
// checkCudaErrors(cudaFree(gpu_woi)); // checkCudaErrors(cudaFree(gpu_woi));
// checkCudaErrors(cudaFree(gpu_num_texture_tiles)); // checkCudaErrors(cudaFree(gpu_num_texture_tiles));
// __syncthreads(); // __syncthreads();
...@@ -446,136 +381,15 @@ int main(int argc, char **argv) ...@@ -446,136 +381,15 @@ int main(int argc, char **argv)
findCudaDevice(argc, (const char **)argv); findCudaDevice(argc, (const char **)argv);
float fat_zero = 1000.0f; // 300.0f; // 30.0; float fat_zero = 1000.0f; // 300.0f; // 30.0;
#if TEST_LWIR #if TEST_LWIR
const char* kernel_file[] = { int use_lwir= 1;
"clt/aux_chn0_transposed.kernel", #else
"clt/aux_chn1_transposed.kernel", int use_lwir= 0;
"clt/aux_chn2_transposed.kernel",
"clt/aux_chn3_transposed.kernel",
"clt/aux_chn4_transposed.kernel",
"clt/aux_chn5_transposed.kernel",
"clt/aux_chn6_transposed.kernel",
"clt/aux_chn7_transposed.kernel",
"clt/aux_chn8_transposed.kernel",
"clt/aux_chn9_transposed.kernel",
"clt/aux_chn10_transposed.kernel",
"clt/aux_chn11_transposed.kernel",
"clt/aux_chn12_transposed.kernel",
"clt/aux_chn13_transposed.kernel",
"clt/aux_chn14_transposed.kernel",
"clt/aux_chn15_transposed.kernel"};
const char* kernel_offs_file[] = {
"clt/aux_chn0_transposed.kernel_offsets",
"clt/aux_chn1_transposed.kernel_offsets",
"clt/aux_chn2_transposed.kernel_offsets",
"clt/aux_chn3_transposed.kernel_offsets",
"clt/aux_chn4_transposed.kernel_offsets",
"clt/aux_chn5_transposed.kernel_offsets",
"clt/aux_chn6_transposed.kernel_offsets",
"clt/aux_chn7_transposed.kernel_offsets",
"clt/aux_chn8_transposed.kernel_offsets",
"clt/aux_chn9_transposed.kernel_offsets",
"clt/aux_chn10_transposed.kernel_offsets",
"clt/aux_chn11_transposed.kernel_offsets",
"clt/aux_chn12_transposed.kernel_offsets",
"clt/aux_chn13_transposed.kernel_offsets",
"clt/aux_chn14_transposed.kernel_offsets",
"clt/aux_chn15_transposed.kernel_offsets"};
const char* image_files[] = {
"clt/aux_chn0.bayer",
"clt/aux_chn1.bayer",
"clt/aux_chn2.bayer",
"clt/aux_chn3.bayer",
"clt/aux_chn4.bayer",
"clt/aux_chn5.bayer",
"clt/aux_chn6.bayer",
"clt/aux_chn7.bayer",
"clt/aux_chn8.bayer",
"clt/aux_chn9.bayer",
"clt/aux_chn10.bayer",
"clt/aux_chn11.bayer",
"clt/aux_chn12.bayer",
"clt/aux_chn13.bayer",
"clt/aux_chn14.bayer",
"clt/aux_chn15.bayer"};
const char* ports_offs_xy_file[] = {
"clt/aux_chn0.portsxy",
"clt/aux_chn1.portsxy",
"clt/aux_chn2.portsxy",
"clt/aux_chn3.portsxy",
"clt/aux_chn4.portsxy",
"clt/aux_chn5.portsxy",
"clt/aux_chn6.portsxy",
"clt/aux_chn7.portsxy",
"clt/aux_chn8.portsxy",
"clt/aux_chn9.portsxy",
"clt/aux_chn10.portsxy",
"clt/aux_chn11.portsxy",
"clt/aux_chn12.portsxy",
"clt/aux_chn13.portsxy",
"clt/aux_chn14.portsxy",
"clt/aux_chn15.portsxy"};
//#ifndef DBG_TILE
#ifdef SAVE_CLT
const char* ports_clt_file[] = { // never referenced
"clt/aux_chn0.clt",
"clt/aux_chn1.clt",
"clt/aux_chn2.clt",
"clt/aux_chn3.clt",
"clt/aux_chn4.clt",
"clt/aux_chn5.clt",
"clt/aux_chn6.clt",
"clt/aux_chn7.clt",
"clt/aux_chn8.clt",
"clt/aux_chn9.clt",
"clt/aux_chn10.clt",
"clt/aux_chn11.clt",
"clt/aux_chn12.clt",
"clt/aux_chn13.clt",
"clt/aux_chn14.clt",
"clt/aux_chn15.clt"};
#endif #endif
const char* result_rbg_file[] = {
"clt/aux_chn0.rbg", TpPaths tpPaths(use_lwir);
"clt/aux_chn1.rbg",
"clt/aux_chn2.rbg", #if TEST_LWIR
"clt/aux_chn3.rbg",
"clt/aux_chn4.rbg",
"clt/aux_chn5.rbg",
"clt/aux_chn6.rbg",
"clt/aux_chn7.rbg",
"clt/aux_chn8.rbg",
"clt/aux_chn9.rbg",
"clt/aux_chn10.rbg",
"clt/aux_chn11.rbg",
"clt/aux_chn12.rbg",
"clt/aux_chn13.rbg",
"clt/aux_chn14.rbg",
"clt/aux_chn15.rbg"};
//#endif
const char* result_corr_file = "clt/aux_corr.corr";
const char* result_corr_quad_file = "clt/aux_corr-quad.corr";
const char* result_corr_td_norm_file = "clt/aux_corr-td-norm.corr";
/// const char* result_corr_cross_file = "clt/aux_corr-cross.corr";
const char* result_inter_td_norm_file = "clt/aux_inter-td-norm.corr";
const char* result_textures_file = "clt/aux_texture_nodp.rgba";
const char* result_diff_rgb_combo_file ="clt/aux_diff_rgb_combo_nodp.drbg";
const char* result_textures_rgba_file = "clt/aux_texture_rgba_nodp.rgba";
const char* result_textures_file_dp = "clt/aux_texture_dp.rgba";
const char* result_diff_rgb_combo_file_dp ="clt/aux_diff_rgb_combo_dp.drbg";
const char* result_textures_rgba_file_dp = "clt/aux_texture_rgba_dp.rgba";
const char* rByRDist_file = "clt/aux.rbyrdist";
const char* correction_vector_file = "clt/aux.correction_vector";
const char* geometry_correction_file = "clt/aux.geometry_correction";
float color_weights [] = { float color_weights [] = {
1.0, // float weight0, // scale for R 0.5 / (1.0 + 0.5 +0.2) 1.0, // float weight0, // scale for R 0.5 / (1.0 + 0.5 +0.2)
...@@ -589,62 +403,7 @@ int main(int argc, char **argv) ...@@ -589,62 +403,7 @@ int main(int argc, char **argv)
12.0 // 3.0 // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages) 12.0 // 3.0 // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
}; };
#else #else
const char* kernel_file[] = {
"clt/main_chn0_transposed.kernel",
"clt/main_chn1_transposed.kernel",
"clt/main_chn2_transposed.kernel",
"clt/main_chn3_transposed.kernel"};
const char* kernel_offs_file[] = {
"clt/main_chn0_transposed.kernel_offsets",
"clt/main_chn1_transposed.kernel_offsets",
"clt/main_chn2_transposed.kernel_offsets",
"clt/main_chn3_transposed.kernel_offsets"};
const char* image_files[] = {
"clt/main_chn0.bayer",
"clt/main_chn1.bayer",
"clt/main_chn2.bayer",
"clt/main_chn3.bayer"};
const char* ports_offs_xy_file[] = {
"clt/main_chn0.portsxy",
"clt/main_chn1.portsxy",
"clt/main_chn2.portsxy",
"clt/main_chn3.portsxy"};
#ifdef SAVE_CLT
const char* ports_clt_file[] = { // never referenced
"clt/main_chn0.clt",
"clt/main_chn1.clt",
"clt/main_chn2.clt",
"clt/main_chn3.clt"};
#endif
const char* result_rbg_file[] = {
"clt/main_chn0.rbg",
"clt/main_chn1.rbg",
"clt/main_chn2.rbg",
"clt/main_chn3.rbg"};
//#endif
const char* result_corr_file = "clt/main_corr.corr";
const char* result_corr_quad_file = "clt/main_corr-quad.corr";
const char* result_corr_td_norm_file = "clt/aux_corr-td-norm.corr";
/// const char* result_corr_cross_file = "clt/main_corr-cross.corr";
const char* result_inter_td_norm_file = "clt/aux_inter-td-norm.corr";
const char* result_textures_file = "clt/main_texture_nodp.rgba";
const char* result_diff_rgb_combo_file ="clt/main_diff_rgb_combo_nodp.drbg";
const char* result_textures_rgba_file = "clt/main_texture_rgba_nodp.rgba";
const char* result_textures_file_dp = "clt/main_texture_dp.rgba";
const char* result_diff_rgb_combo_file_dp = "clt/main_diff_rgb_combo_dp.drbg";
const char* result_textures_rgba_file_dp = "clt/main_texture_rgba_dp.rgba";
const char* rByRDist_file = "clt/main.rbyrdist";
const char* correction_vector_file = "clt/main.correction_vector";
const char* geometry_correction_file = "clt/main.geometry_correction";
float color_weights [] = { float color_weights [] = {
0.294118, // float weight0, // scale for R 0.5 / (1.0 + 0.5 +0.2) 0.294118, // float weight0, // scale for R 0.5 / (1.0 + 0.5 +0.2)
0.117647, // float weight1, // scale for B 0.2 / (1.0 + 0.5 +0.2) 0.117647, // float weight1, // scale for B 0.2 / (1.0 + 0.5 +0.2)
...@@ -661,7 +420,7 @@ int main(int argc, char **argv) ...@@ -661,7 +420,7 @@ int main(int argc, char **argv)
int sel_pairs[4]; [[maybe_unused]] int sel_pairs[4];
#if TEST_LWIR #if TEST_LWIR
// testing with 16 LWIR // testing with 16 LWIR
...@@ -803,13 +562,13 @@ int main(int argc, char **argv) ...@@ -803,13 +562,13 @@ int main(int argc, char **argv)
readFloatsFromFile( readFloatsFromFile(
(float *) &fgeometry_correction, // float * data, // allocated array (float *) &fgeometry_correction, // float * data, // allocated array
geometry_correction_file); // char * path) // file path tpPaths.geometry_correction_file); // char * path) // file path
rByRDist = readAllFloatsFromFile( rByRDist = readAllFloatsFromFile(
rByRDist_file, // const char * path, tpPaths.rByRDist_file, // const char * path,
&rByRDist_length); // int * len_in_floats) &rByRDist_length); // int * len_in_floats)
correction_vector = readAllFloatsFromFile( correction_vector = readAllFloatsFromFile(
correction_vector_file, // const char * path, tpPaths.correction_vector_file, // const char * path,
&correction_vector_length); // int * len_in_floats) &correction_vector_length); // int * len_in_floats)
gpu_geometry_correction = (struct gc *) copyalloc_kernel_gpu( gpu_geometry_correction = (struct gc *) copyalloc_kernel_gpu(
...@@ -830,12 +589,12 @@ int main(int argc, char **argv) ...@@ -830,12 +589,12 @@ int main(int argc, char **argv)
for (int ncam = 0; ncam < num_cams; ncam++) { for (int ncam = 0; ncam < num_cams; ncam++) {
readFloatsFromFile( readFloatsFromFile(
host_kern_buf, // float * data, // allocated array host_kern_buf, // float * data, // allocated array
kernel_file[ncam]); // char * path) // file path tpPaths.kernel_file[ncam]); // char * path) // file path
gpu_kernels_h[ncam] = copyalloc_kernel_gpu(host_kern_buf, KERN_SIZE); gpu_kernels_h[ncam] = copyalloc_kernel_gpu(host_kern_buf, KERN_SIZE);
readFloatsFromFile( readFloatsFromFile(
host_kern_buf, // float * data, // allocated array host_kern_buf, // float * data, // allocated array
kernel_offs_file[ncam]); // char * path) // file path tpPaths.kernel_offs_file[ncam]); // char * path) // file path
gpu_kernel_offsets_h[ncam] = (struct CltExtra *) copyalloc_kernel_gpu( gpu_kernel_offsets_h[ncam] = (struct CltExtra *) copyalloc_kernel_gpu(
host_kern_buf, host_kern_buf,
KERN_TILES * (sizeof( struct CltExtra)/sizeof(float))); KERN_TILES * (sizeof( struct CltExtra)/sizeof(float)));
...@@ -878,7 +637,7 @@ int main(int argc, char **argv) ...@@ -878,7 +637,7 @@ int main(int argc, char **argv)
for (int ncam = 0; ncam < num_cams; ncam++) { for (int ncam = 0; ncam < num_cams; ncam++) {
readFloatsFromFile( readFloatsFromFile(
host_kern_buf, // float * data, // allocated array host_kern_buf, // float * data, // allocated array
image_files[ncam]); // char * path) // file path tpPaths.image_files[ncam]); // char * path) // file path
gpu_images_h[ncam] = copyalloc_image_gpu( gpu_images_h[ncam] = copyalloc_image_gpu(
host_kern_buf, // float * image_host, host_kern_buf, // float * image_host,
&dstride, // size_t* dstride, &dstride, // size_t* dstride,
...@@ -891,14 +650,12 @@ int main(int argc, char **argv) ...@@ -891,14 +650,12 @@ int main(int argc, char **argv)
for (int ncam = 0; ncam < num_cams; ncam++) { for (int ncam = 0; ncam < num_cams; ncam++) {
readFloatsFromFile( readFloatsFromFile(
(float *) &tile_coords_h[ncam], (float *) &tile_coords_h[ncam],
ports_offs_xy_file[ncam]); // char * path) // file path tpPaths.ports_offs_xy_file[ncam]); // char * path) // file path
} }
for (int ty = 0; ty < TILESY; ty++){ for (int ty = 0; ty < TILESY; ty++){
for (int tx = 0; tx < TILESX; tx++){ for (int tx = 0; tx < TILESX; tx++){
int nt = ty * TILESX + tx; int nt = ty * TILESX + tx;
// int task_task = 0xf | (((1 << NUM_PAIRS)-1) << TASK_CORR_BITS);
// int task_task = 0xf | (1 << TASK_CORR_BITS); // just 1 bit, correlation selection is defined by common corr_sel bits
int task_task = (1 << TASK_INTER_EN) | (1 << TASK_CORR_EN) | (1 << TASK_TEXT_EN); // just 1 bit, correlation selection is defined by common corr_sel bits int task_task = (1 << TASK_INTER_EN) | (1 << TASK_CORR_EN) | (1 << TASK_TEXT_EN); // just 1 bit, correlation selection is defined by common corr_sel bits
int task_txy = tx + (ty << 16); int task_txy = tx + (ty << 16);
float task_target_disparity = DBG_DISPARITY; float task_target_disparity = DBG_DISPARITY;
...@@ -1268,10 +1025,10 @@ int main(int argc, char **argv) ...@@ -1268,10 +1025,10 @@ int main(int argc, char **argv)
gpu_clt_h[ncam], gpu_clt_h[ncam],
rslt_size * sizeof(float), rslt_size * sizeof(float),
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
printf("Writing CLT data to %s\n", ports_clt_file[ncam]); printf("Writing CLT data to %s\n", tpPaths.ports_clt_file[ncam]);
writeFloatsToFile(cpu_clt, // float * data, // allocated array writeFloatsToFile(cpu_clt, // float * data, // allocated array
rslt_size, // int size, // length in elements rslt_size, // int size, // length in elements
ports_clt_file[ncam]); // const char * path) // file path tpPaths.ports_clt_file[ncam]); // const char * path) // file path
} }
#endif #endif
...@@ -1346,11 +1103,11 @@ int main(int argc, char **argv) ...@@ -1346,11 +1103,11 @@ int main(int argc, char **argv)
// 3* (IMG_HEIGHT + DTT_SIZE), // 3* (IMG_HEIGHT + DTT_SIZE),
num_colors* (IMG_HEIGHT + DTT_SIZE), num_colors* (IMG_HEIGHT + DTT_SIZE),
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
printf("Writing RBG data to %s\n", result_rbg_file[ncam]); printf("Writing RBG data to %s\n", tpPaths.result_rbg_file[ncam]);
writeFloatsToFile( // will have margins writeFloatsToFile( // will have margins
cpu_corr_image, // float * data, // allocated array cpu_corr_image, // float * data, // allocated array
rslt_img_size, // int size, // length in elements rslt_img_size, // int size, // length in elements
result_rbg_file[ncam]); // const char * path) // file path tpPaths.result_rbg_file[ncam]); // const char * path) // file path
} }
free(cpu_corr_image); free(cpu_corr_image);
...@@ -1463,17 +1220,17 @@ int main(int argc, char **argv) ...@@ -1463,17 +1220,17 @@ int main(int argc, char **argv)
#ifndef NSAVE_CORR #ifndef NSAVE_CORR
printf("Writing phase correlation data to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n", printf("Writing phase correlation data to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n",
result_corr_file, (TILESX*16),(TILESYA*16), num_pairs, (corr_img_size * sizeof(float)) ) ; tpPaths.result_corr_file, (TILESX*16),(TILESYA*16), num_pairs, (corr_img_size * sizeof(float)) ) ;
/* /*
writeFloatsToFile( writeFloatsToFile(
cpu_corr, // float * data, // allocated array cpu_corr, // float * data, // allocated array
rslt_corr_size, // int size, // length in elements rslt_corr_size, // int size, // length in elements
result_corr_file); // const char * path) // file path tpPaths.result_corr_file); // const char * path) // file path
*/ */
writeFloatsToFile( writeFloatsToFile(
corr_img, // float * data, // allocated array corr_img, // float * data, // allocated array
corr_img_size, // int size, // length in elements corr_img_size, // int size, // length in elements
result_corr_file); // const char * path) // file path tpPaths.result_corr_file); // const char * path) // file path
#endif #endif
free (cpu_corr); free (cpu_corr);
free (cpu_corr_indices); free (cpu_corr_indices);
...@@ -1589,15 +1346,13 @@ int main(int argc, char **argv) ...@@ -1589,15 +1346,13 @@ int main(int argc, char **argv)
(corr_size_combo * corr_size_combo) * sizeof(float), (corr_size_combo * corr_size_combo) * sizeof(float),
num_corr_combo, num_corr_combo,
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
// const char* result_corr_quad_file = "clt/main_corr-quad.corr";
// const char* result_corr_cross_file = "clt/main_corr-cross.corr";
#ifndef NSAVE_CORR #ifndef NSAVE_CORR
printf("Writing phase correlation data to %s\n", result_corr_quad_file); printf("Writing phase correlation data to %s\n", tpPaths.result_corr_quad_file);
writeFloatsToFile( writeFloatsToFile(
cpu_corr_combo, // float * data, // allocated array cpu_corr_combo, // float * data, // allocated array
rslt_corr_size_combo, // int size, // length in elements rslt_corr_size_combo, // int size, // length in elements
result_corr_quad_file); // const char * path) // file path tpPaths.result_corr_quad_file); // const char * path) // file path
#endif #endif
free(cpu_corr_combo); free(cpu_corr_combo);
...@@ -1667,11 +1422,11 @@ int main(int argc, char **argv) ...@@ -1667,11 +1422,11 @@ int main(int argc, char **argv)
#ifndef NSAVE_CORR #ifndef NSAVE_CORR
printf("Writing phase correlation data to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n", printf("Writing phase correlation data to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n",
result_corr_td_norm_file, (TILESX*16),(TILESYA*16), num_pairs, (corr_img_size * sizeof(float)) ) ; tpPaths.result_corr_td_norm_file, (TILESX*16),(TILESYA*16), num_pairs, (corr_img_size * sizeof(float)) ) ;
writeFloatsToFile( writeFloatsToFile(
corr_img, // float * data, // allocated array corr_img, // float * data, // allocated array
corr_img_size, // int size, // length in elements corr_img_size, // int size, // length in elements
result_corr_td_norm_file); // const char * path) // file path tpPaths.result_corr_td_norm_file); // const char * path) // file path
#endif #endif
#if 1 // export TD intra #if 1 // export TD intra
int intra_corr_size_td = num_corrs * DTT_SIZE2*DTT_SIZE2; int intra_corr_size_td = num_corrs * DTT_SIZE2*DTT_SIZE2;
...@@ -1780,7 +1535,7 @@ int main(int argc, char **argv) ...@@ -1780,7 +1535,7 @@ int main(int argc, char **argv)
for (int ncam = 0; ncam < num_cams; ncam++) { for (int ncam = 0; ncam < num_cams; ncam++) {
readFloatsFromFile( readFloatsFromFile(
host_kern_buf, // float * data, // allocated array host_kern_buf, // float * data, // allocated array
image_files[ncam]); // char * path) // file path tpPaths.image_files[ncam]); // char * path) // file path
shift_image ( shift_image (
host_kern_buf, // float * image, host_kern_buf, // float * image,
IMG_WIDTH, // int width, IMG_WIDTH, // int width,
...@@ -1925,11 +1680,11 @@ int main(int argc, char **argv) ...@@ -1925,11 +1680,11 @@ int main(int argc, char **argv)
} }
#ifndef NSAVE_CORR #ifndef NSAVE_CORR
printf("Writing interscene phase correlation data to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n", printf("Writing interscene phase correlation data to %s, width = %d, height=%d, slices=%d, length=%ld bytes\n",
result_inter_td_norm_file, (TILESX*16),(TILESYA*16), num_pairs, (corr_img_size * sizeof(float)) ) ; tpPaths.result_inter_td_norm_file, (TILESX*16),(TILESYA*16), num_pairs, (corr_img_size * sizeof(float)) ) ;
writeFloatsToFile( writeFloatsToFile(
corr_img, // float * data, // allocated array corr_img, // float * data, // allocated array
corr_img_size, // int size, // length in elements corr_img_size, // int size, // length in elements
result_inter_td_norm_file); // const char * path) // file path tpPaths.result_inter_td_norm_file); // const char * path) // file path
#endif #endif
...@@ -2186,8 +1941,8 @@ int main(int argc, char **argv) ...@@ -2186,8 +1941,8 @@ int main(int argc, char **argv)
int src_index0 = itile * tile_texture_size + 256 * ilayer; int src_index0 = itile * tile_texture_size + 256 * ilayer;
int dst_index0 = ilayer * (TILESX * TILESYA * 256) + (tileY * 16) * (16 * TILESX) + (tileX * 16); int dst_index0 = ilayer * (TILESX * TILESYA * 256) + (tileY * 16) * (16 * TILESX) + (tileX * 16);
for (int iy = 0; iy < 16; iy++){ for (int iy = 0; iy < 16; iy++){
int src_index1 = src_index0 + 16 * iy; [[maybe_unused]] int src_index1 = src_index0 + 16 * iy;
int dst_index1 = dst_index0 + iy * (16 * TILESX); [[maybe_unused]] int dst_index1 = dst_index0 + iy * (16 * TILESX);
for (int ix = 0; ix < 16; ix++){ for (int ix = 0; ix < 16; ix++){
int src_index= itile * tile_texture_size + 256 * ilayer + 16 * iy + ix; int src_index= itile * tile_texture_size + 256 * ilayer + 16 * iy + ix;
int dst_index = ilayer * (TILESX * TILESY * 256) + (tileY * 16 + iy) * (16 * TILESX) + (tileX * 16) + ix; int dst_index = ilayer * (TILESX * TILESY * 256) + (tileY * 16 + iy) * (16 * TILESX) + (tileX * 16) + ix;
...@@ -2218,27 +1973,27 @@ int main(int argc, char **argv) ...@@ -2218,27 +1973,27 @@ int main(int argc, char **argv)
#ifndef NSAVE_TEXTURES #ifndef NSAVE_TEXTURES
#ifdef NO_DP #ifdef NO_DP
printf("Writing phase texture data to %s\n", result_textures_file); printf("Writing phase texture data to %s\n", tpPaths.result_textures_file);
writeFloatsToFile( writeFloatsToFile(
non_overlap_layers, // float * data, // allocated array non_overlap_layers, // float * data, // allocated array
rslt_texture_size, // int size, // length in elements rslt_texture_size, // int size, // length in elements
result_textures_file); // const char * path) // file path tpPaths.result_textures_file); // const char * path) // file path
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file); printf("Writing low-res data to %s\n", tpPaths.result_diff_rgb_combo_file);
writeFloatsToFile( writeFloatsToFile(
cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array
diff_rgb_combo_size, // int size, // length in elements diff_rgb_combo_size, // int size, // length in elements
result_diff_rgb_combo_file); // const char * path) // file path tpPaths.result_diff_rgb_combo_file); // const char * path) // file path
#else #else
printf("Writing phase texture data to %s\n", result_textures_file_dp); printf("Writing phase texture data to %s\n", tpPaths.result_textures_file_dp);
writeFloatsToFile( writeFloatsToFile(
non_overlap_layers, // float * data, // allocated array non_overlap_layers, // float * data, // allocated array
rslt_texture_size, // int size, // length in elements rslt_texture_size, // int size, // length in elements
result_textures_file_dp); // const char * path) // file path tpPaths.result_textures_file_dp); // const char * path) // file path
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file_dp); printf("Writing low-res data to %s\n", tpPaths.result_diff_rgb_combo_file_dp);
writeFloatsToFile( writeFloatsToFile(
cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array
diff_rgb_combo_size, // int size, // length in elements diff_rgb_combo_size, // int size, // length in elements
result_diff_rgb_combo_file_dp); // const char * path) // file path tpPaths.result_diff_rgb_combo_file_dp); // const char * path) // file path
#endif #endif
#ifdef DBG_TILE #ifdef DBG_TILE
...@@ -2376,17 +2131,17 @@ int main(int argc, char **argv) ...@@ -2376,17 +2131,17 @@ int main(int argc, char **argv)
#ifndef NSAVE_TEXTURES #ifndef NSAVE_TEXTURES
#ifdef NO_DP #ifdef NO_DP
printf("Writing RBGA texture slices to %s\n", result_textures_rgba_file); printf("Writing RBGA texture slices to %s\n", tpPaths.result_textures_rgba_file);
writeFloatsToFile( writeFloatsToFile(
cpu_textures_rgba, // float * data, // allocated array cpu_textures_rgba, // float * data, // allocated array
rslt_rgba_size, // int size, // length in elements rslt_rgba_size, // int size, // length in elements
result_textures_rgba_file); // const char * path) // file path tpPaths.result_textures_rgba_file); // const char * path) // file path
#else #else
printf("Writing RBGA texture slices to %s\n", result_textures_rgba_file_dp); printf("Writing RBGA texture slices to %s\n", tpPaths.result_textures_rgba_file_dp);
writeFloatsToFile( writeFloatsToFile(
cpu_textures_rgba, // float * data, // allocated array cpu_textures_rgba, // float * data, // allocated array
rslt_rgba_size, // int size, // length in elements rslt_rgba_size, // int size, // length in elements
result_textures_rgba_file_dp); // const char * path) // file path tpPaths.result_textures_rgba_file_dp); // const char * path) // file path
#endif #endif
#endif #endif
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
//#include <iterator> //#include <iterator>
//#include <vector> //#include <vector>
//#include "tp_utils.h" //#include "tp_utils.h"
#include "tp_files.h" #include "tp_files.h"
int get_file_size(std::string filename) // path to file int get_file_size(std::string filename) // path to file
{ {
......
/*
* tp_paths.cu
*
* Created on: Mar 26, 2025
* Author: elphel
*/
#include "tp_paths.h"
TpPaths::TpPaths(int lwir){
// values for LWIR sensors
const char* kernel_file_lwir[16] = {
"clt/aux_chn0_transposed.kernel",
"clt/aux_chn1_transposed.kernel",
"clt/aux_chn2_transposed.kernel",
"clt/aux_chn3_transposed.kernel",
"clt/aux_chn4_transposed.kernel",
"clt/aux_chn5_transposed.kernel",
"clt/aux_chn6_transposed.kernel",
"clt/aux_chn7_transposed.kernel",
"clt/aux_chn8_transposed.kernel",
"clt/aux_chn9_transposed.kernel",
"clt/aux_chn10_transposed.kernel",
"clt/aux_chn11_transposed.kernel",
"clt/aux_chn12_transposed.kernel",
"clt/aux_chn13_transposed.kernel",
"clt/aux_chn14_transposed.kernel",
"clt/aux_chn15_transposed.kernel"};
const char* kernel_offs_file_lwir[16] = {
"clt/aux_chn0_transposed.kernel_offsets",
"clt/aux_chn1_transposed.kernel_offsets",
"clt/aux_chn2_transposed.kernel_offsets",
"clt/aux_chn3_transposed.kernel_offsets",
"clt/aux_chn4_transposed.kernel_offsets",
"clt/aux_chn5_transposed.kernel_offsets",
"clt/aux_chn6_transposed.kernel_offsets",
"clt/aux_chn7_transposed.kernel_offsets",
"clt/aux_chn8_transposed.kernel_offsets",
"clt/aux_chn9_transposed.kernel_offsets",
"clt/aux_chn10_transposed.kernel_offsets",
"clt/aux_chn11_transposed.kernel_offsets",
"clt/aux_chn12_transposed.kernel_offsets",
"clt/aux_chn13_transposed.kernel_offsets",
"clt/aux_chn14_transposed.kernel_offsets",
"clt/aux_chn15_transposed.kernel_offsets"};
const char* image_files_lwir[16] = {
"clt/aux_chn0.bayer",
"clt/aux_chn1.bayer",
"clt/aux_chn2.bayer",
"clt/aux_chn3.bayer",
"clt/aux_chn4.bayer",
"clt/aux_chn5.bayer",
"clt/aux_chn6.bayer",
"clt/aux_chn7.bayer",
"clt/aux_chn8.bayer",
"clt/aux_chn9.bayer",
"clt/aux_chn10.bayer",
"clt/aux_chn11.bayer",
"clt/aux_chn12.bayer",
"clt/aux_chn13.bayer",
"clt/aux_chn14.bayer",
"clt/aux_chn15.bayer"};
const char* ports_offs_xy_file_lwir[16] = {
"clt/aux_chn0.portsxy",
"clt/aux_chn1.portsxy",
"clt/aux_chn2.portsxy",
"clt/aux_chn3.portsxy",
"clt/aux_chn4.portsxy",
"clt/aux_chn5.portsxy",
"clt/aux_chn6.portsxy",
"clt/aux_chn7.portsxy",
"clt/aux_chn8.portsxy",
"clt/aux_chn9.portsxy",
"clt/aux_chn10.portsxy",
"clt/aux_chn11.portsxy",
"clt/aux_chn12.portsxy",
"clt/aux_chn13.portsxy",
"clt/aux_chn14.portsxy",
"clt/aux_chn15.portsxy"};
//#ifndef DBG_TILE
const char* ports_clt_file_lwir[16] = { // never referenced
"clt/aux_chn0.clt",
"clt/aux_chn1.clt",
"clt/aux_chn2.clt",
"clt/aux_chn3.clt",
"clt/aux_chn4.clt",
"clt/aux_chn5.clt",
"clt/aux_chn6.clt",
"clt/aux_chn7.clt",
"clt/aux_chn8.clt",
"clt/aux_chn9.clt",
"clt/aux_chn10.clt",
"clt/aux_chn11.clt",
"clt/aux_chn12.clt",
"clt/aux_chn13.clt",
"clt/aux_chn14.clt",
"clt/aux_chn15.clt"};
const char* result_rbg_file_lwir[16] = {
"clt/aux_chn0.rbg",
"clt/aux_chn1.rbg",
"clt/aux_chn2.rbg",
"clt/aux_chn3.rbg",
"clt/aux_chn4.rbg",
"clt/aux_chn5.rbg",
"clt/aux_chn6.rbg",
"clt/aux_chn7.rbg",
"clt/aux_chn8.rbg",
"clt/aux_chn9.rbg",
"clt/aux_chn10.rbg",
"clt/aux_chn11.rbg",
"clt/aux_chn12.rbg",
"clt/aux_chn13.rbg",
"clt/aux_chn14.rbg",
"clt/aux_chn15.rbg"};
//#endif
/*
const char* result_corr_file_lwir = "clt/aux_corr.corr";
const char* result_corr_quad_file_lwir = "clt/aux_corr-quad.corr";
const char* result_corr_td_norm_file_lwir = "clt/aux_corr-td-norm.corr";
const char* result_inter_td_norm_file_lwir = "clt/aux_inter-td-norm.corr";
const char* result_textures_file_lwir = "clt/aux_texture_nodp.rgba";
const char* result_diff_rgb_combo_file_lwir ="clt/aux_diff_rgb_combo_nodp.drbg";
const char* result_textures_rgba_file_lwir = "clt/aux_texture_rgba_nodp.rgba";
const char* result_textures_file_dp_lwir = "clt/aux_texture_dp.rgba";
const char* result_diff_rgb_combo_file_dp_lwir ="clt/aux_diff_rgb_combo_dp.drbg";
const char* result_textures_rgba_file_dp_lwir = "clt/aux_texture_rgba_dp.rgba";
const char* rByRDist_file_lwir = "clt/aux.rbyrdist";
const char* correction_vector_file_lwir = "clt/aux.correction_vector";
const char* geometry_correction_file_lwir = "clt/aux.geometry_correction";
float color_weights_lwir [3] = {
1.0, // float weight0, // scale for R 0.5 / (1.0 + 0.5 +0.2)
1.0, // float weight1, // scale for B 0.2 / (1.0 + 0.5 +0.2)
1.0}; // float weight2, // scale for G 1.0 / (1.0 + 0.5 +0.2)
float generate_RBGA_params_lwir[5]={
10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0
10.0, // 1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
12.0 // 3.0 // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
};
*/
// values for RGB sensors
const char* kernel_file_rgb[4] = {
"clt/main_chn0_transposed.kernel",
"clt/main_chn1_transposed.kernel",
"clt/main_chn2_transposed.kernel",
"clt/main_chn3_transposed.kernel"};
const char* kernel_offs_file_rgb[4] = {
"clt/main_chn0_transposed.kernel_offsets",
"clt/main_chn1_transposed.kernel_offsets",
"clt/main_chn2_transposed.kernel_offsets",
"clt/main_chn3_transposed.kernel_offsets"};
const char* image_files_rgb[4] = {
"clt/main_chn0.bayer",
"clt/main_chn1.bayer",
"clt/main_chn2.bayer",
"clt/main_chn3.bayer"};
const char* ports_offs_xy_file_rgb[4] = {
"clt/main_chn0.portsxy",
"clt/main_chn1.portsxy",
"clt/main_chn2.portsxy",
"clt/main_chn3.portsxy"};
const char* ports_clt_file_rgb[4] = { // never referenced
"clt/main_chn0.clt",
"clt/main_chn1.clt",
"clt/main_chn2.clt",
"clt/main_chn3.clt"};
const char* result_rbg_file_rgb[4] = {
"clt/main_chn0.rbg",
"clt/main_chn1.rbg",
"clt/main_chn2.rbg",
"clt/main_chn3.rbg"};
//#endif
/*
const char* result_corr_file_rgb = "clt/main_corr.corr";
const char* result_corr_quad_file_rgb = "clt/main_corr-quad.corr";
const char* result_corr_td_norm_file_rgb = "clt/aux_corr-td-norm.corr";
/// const char* result_corr_cross_file = "clt/main_corr-cross.corr";
const char* result_inter_td_norm_file_rgb = "clt/aux_inter-td-norm.corr";
const char* result_textures_file_rgb = "clt/main_texture_nodp.rgba";
const char* result_diff_rgb_combo_file_rgb = "clt/main_diff_rgb_combo_nodp.drbg";
const char* result_textures_rgba_file_rgb = "clt/main_texture_rgba_nodp.rgba";
const char* result_textures_file_dp_rgb = "clt/main_texture_dp.rgba";
const char* result_diff_rgb_combo_file_dp_rgb = "clt/main_diff_rgb_combo_dp.drbg";
const char* result_textures_rgba_file_dp_rgb = "clt/main_texture_rgba_dp.rgba";
const char* rByRDist_file_rgb = "clt/main.rbyrdist";
const char* correction_vector_file_rgb = "clt/main.correction_vector";
const char* geometry_correction_file_rgb = "clt/main.geometry_correction";
*/
/*
float color_weights_rgb [3] = {
0.294118, // float weight0, // scale for R 0.5 / (1.0 + 0.5 +0.2)
0.117647, // float weight1, // scale for B 0.2 / (1.0 + 0.5 +0.2)
0.588235}; // float weight2, // scale for G 1.0 / (1.0 + 0.5 +0.2)
float generate_RBGA_params_rgb[5]={
10.0, // float min_shot, // 10.0
3.0, // float scale_shot, // 3.0
1.5f, // float diff_sigma, // pixel value/pixel change
10.0f, // float diff_threshold, // pixel value/pixel change
3.0 // float min_agree, // minimal number of channels to agree on a point (real number to work with fuzzy averages)
};
*/
/*
const char ** kernel_file;
const char ** kernel_offs_file;
const char ** image_files;
const char ** ports_offs_xy_file;
const char ** ports_clt_file;
const char ** result_rbg_file;
const char* result_corr_file;
const char* result_corr_quad_file;
const char* result_corr_td_norm_file;
const char* result_inter_td_norm_file;
const char* result_textures_file;
const char* result_diff_rgb_combo_file;
const char* result_textures_rgba_file;
const char* result_textures_file_dp;
const char* result_diff_rgb_combo_file_dp;
const char* result_textures_rgba_file_dp;
const char* rByRDist_file;
const char* correction_vector_file;
const char* geometry_correction_file;
const float* color_weights;
const float* generate_RBGA_params;
*/
kernel_file = lwir? kernel_file_lwir : kernel_file_rgb;
kernel_offs_file = lwir? kernel_offs_file_lwir : kernel_offs_file_rgb;
image_files = lwir? image_files_lwir : image_files_rgb;
ports_offs_xy_file = lwir? ports_offs_xy_file_lwir : ports_offs_xy_file_rgb;
ports_clt_file = lwir? ports_clt_file_lwir : ports_clt_file_rgb;
result_rbg_file = lwir? result_rbg_file_lwir : result_rbg_file_rgb;
result_corr_file = lwir ? "clt/aux_corr.corr" : "clt/main_corr.corr";
result_corr_quad_file = lwir ? "clt/aux_corr-quad.corr" : "clt/main_corr-quad.corr";
result_corr_td_norm_file = lwir ? "clt/aux_corr-td-norm.corr": "clt/aux_corr-td-norm.corr";
result_inter_td_norm_file = lwir ? "clt/aux_inter-td-norm.corr" : "clt/aux_inter-td-norm.corr";
result_textures_file = lwir ? "clt/aux_texture_nodp.rgba" : "clt/main_texture_nodp.rgba";
result_diff_rgb_combo_file = lwir ? "clt/aux_diff_rgb_combo_nodp.drbg" : "clt/main_diff_rgb_combo_nodp.drbg";
result_textures_rgba_file = lwir ? "clt/aux_texture_rgba_nodp.rgba" : "clt/main_texture_rgba_nodp.rgba";
result_textures_file_dp = lwir ? "clt/aux_texture_dp.rgba" : "clt/main_texture_dp.rgba";
result_diff_rgb_combo_file_dp = lwir ? "clt/aux_diff_rgb_combo_dp.drbg" : "clt/main_diff_rgb_combo_dp.drbg";
result_textures_rgba_file_dp = lwir ? "clt/aux_texture_rgba_dp.rgba" : "clt/main_texture_rgba_dp.rgba";
rByRDist_file = lwir ? "clt/aux.rbyrdist" : "clt/main.rbyrdist";
correction_vector_file = lwir ? "clt/aux.correction_vector" : "clt/main.correction_vector";
geometry_correction_file = lwir ? "clt/aux.geometry_correction" : "clt/main.geometry_correction";
// color_weights = lwir ? color_weights_lwir : color_weights_rgb;
// generate_RBGA_params = lwir ? generate_RBGA_params_lwir : generate_RBGA_params_rgb;
}
/*
* tp_paths.h
*
* Created on: Mar 26, 2025
* Author: elphel
*/
#ifndef SRC_TP_PATHS_H_
#define SRC_TP_PATHS_H_
class TpPaths{
public:
TpPaths(int lwir);
const char ** kernel_file;
const char ** kernel_offs_file;
const char ** image_files;
const char ** ports_offs_xy_file;
const char ** ports_clt_file;
const char ** result_rbg_file;
const char* result_corr_file;
const char* result_corr_quad_file;
const char* result_corr_td_norm_file;
const char* result_inter_td_norm_file;
const char* result_textures_file;
const char* result_diff_rgb_combo_file;
const char* result_textures_rgba_file;
const char* result_textures_file_dp;
const char* result_diff_rgb_combo_file_dp;
const char* result_textures_rgba_file_dp;
const char* rByRDist_file;
const char* correction_vector_file;
const char* geometry_correction_file;
// float * color_weights;// [3];
// float * generate_RBGA_params; // [5];
};
#endif /* SRC_TP_PATHS_H_ */
...@@ -7,6 +7,9 @@ ...@@ -7,6 +7,9 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <helper_cuda.h> #include <helper_cuda.h>
#include "tp_utils.h" #include "tp_utils.h"
#include "dtt8x8.h" // for DTT_SIZE * DTT_SIZE21
#include "tp_defines.h" // for TEXTURE_THREADS_PER_TILE
float * copyalloc_kernel_gpu(float * kernel_host, float * copyalloc_kernel_gpu(float * kernel_host,
int size, // size in floats int size, // size in floats
int full_size) int full_size)
...@@ -172,6 +175,72 @@ float * alloc_image_gpu(size_t* dstride, // in bytes!! ...@@ -172,6 +175,72 @@ float * alloc_image_gpu(size_t* dstride, // in bytes!!
return image_gpu; return image_gpu;
} }
// Prepare low pass filter (64 long) to be applied to each quadrant of the CLT data
void set_clt_lpf(
float * lpf, // size*size array to be filled out
float sigma,
const int dct_size)
{
int dct_len = dct_size * dct_size;
if (sigma == 0.0f) {
lpf[0] = 1.0f;
for (int i = 1; i < dct_len; i++){
lpf[i] = 0.0;
}
} else {
for (int i = 0; i < dct_size; i++){
for (int j = 0; j < dct_size; j++){
lpf[i*dct_size+j] = exp(-(i*i+j*j)/(2*sigma));
}
}
// normalize
double sum = 0;
for (int i = 0; i < dct_size; i++){
for (int j = 0; j < dct_size; j++){
double d = lpf[i*dct_size+j];
d*=cos(M_PI*i/(2*dct_size))*cos(M_PI*j/(2*dct_size));
if (i > 0) d*= 2.0;
if (j > 0) d*= 2.0;
sum +=d;
}
}
for (int i = 0; i< dct_len; i++){
lpf[i] /= sum;
}
}
}
int host_get_textures_shared_size( // in bytes
//__device__ int get_textures_shared_size( // in bytes
int num_cams, // actual number of cameras
int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
int * offsets){ // in floats
// int shared_floats = 0;
int offs = 0;
// int texture_threads_per_tile = TEXTURE_THREADS/num_cams;
if (offsets) offsets[0] = offs;
offs += num_cams * num_colors * 2 * DTT_SIZE * DTT_SIZE21; //float mclt_tiles [NUM_CAMS][NUM_COLORS][2*DTT_SIZE][DTT_SIZE21]
if (offsets) offsets[1] = offs;
offs += num_cams * num_colors * 4 * DTT_SIZE * DTT_SIZE1; // float clt_tiles [NUM_CAMS][NUM_COLORS][4][DTT_SIZE][DTT_SIZE1]
if (offsets) offsets[2] = offs;
// offs += num_cams * num_colors * DTT_SIZE2 * DTT_SIZE21; //float mclt_tmp [NUM_CAMS][NUM_COLORS][DTT_SIZE2][DTT_SIZE21];
int mclt_tmp_size = num_cams * num_colors * DTT_SIZE2 * DTT_SIZE21; // [NUM_CAMS][NUM_COLORS][DTT_SIZE2][DTT_SIZE21]
int rgbaw_size = (2* (num_colors + 1) + num_cams) * DTT_SIZE2 * DTT_SIZE21; // [NUM_COLORS + 1 + NUM_CAMS + NUM_COLORS + 1][DTT_SIZE2][DTT_SIZE21]
offs += (rgbaw_size > mclt_tmp_size) ? rgbaw_size : mclt_tmp_size;
if (offsets) offsets[3] = offs;
offs += num_cams * 2; // float port_offsets [NUM_CAMS][2];
if (offsets) offsets[4] = offs;
offs += num_colors * num_cams; // float ports_rgb_shared [NUM_COLORS][NUM_CAMS];
if (offsets) offsets[5] = offs;
offs += num_cams; // float max_diff_shared [NUM_CAMS];
if (offsets) offsets[6] = offs;
offs += num_cams * TEXTURE_THREADS_PER_TILE; // float max_diff_tmp [NUM_CAMS][TEXTURE_THREADS_PER_TILE]
if (offsets) offsets[7] = offs;
offs += num_colors * num_cams * TEXTURE_THREADS_PER_TILE; //float ports_rgb_tmp [NUM_COLORS][NUM_CAMS][TEXTURE_THREADS_PER_TILE];
if (offsets) offsets[8] = offs;
return sizeof(float) * offs; // shared_floats;
}
...@@ -51,4 +51,17 @@ float * alloc_image_gpu(size_t* dstride, // in bytes!! ...@@ -51,4 +51,17 @@ float * alloc_image_gpu(size_t* dstride, // in bytes!!
int width, int width,
int height); int height);
// Prepare low pass filter (64 long) to be applied to each quadrant of the CLT data
void set_clt_lpf(
float * lpf, // size*size array to be filled out
float sigma,
const int dct_size);
int host_get_textures_shared_size( // in bytes
//__device__ int get_textures_shared_size( // in bytes
int num_cams, // actual number of cameras
int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
int * offsets); // in floats
#endif /* SRC_TP_UTILS_H_ */ #endif /* SRC_TP_UTILS_H_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment