Commit eaf9cd49 authored by Andrey Filippov's avatar Andrey Filippov

Tested DP textures with RGB

parent d9267525
...@@ -2810,7 +2810,7 @@ __global__ void convert_correct_tiles( ...@@ -2810,7 +2810,7 @@ __global__ void convert_correct_tiles(
* @param dust_remove do not reduce average weight when only one image differs much from the average (true) * @param dust_remove do not reduce average weight when only one image differs much from the average (true)
* @param texture_stride output stride in floats (now 256*4 = 1024) * @param texture_stride output stride in floats (now 256*4 = 1024)
* @param gpu_texture_tiles output array (number of colors +1 + ?)*16*16 rgba texture tiles) float values. Will not be calculated if null * @param gpu_texture_tiles output array (number of colors +1 + ?)*16*16 rgba texture tiles) float values. Will not be calculated if null
* @param inescan_order 0 low-res tiles have tghe same order, as gpu_texture_indices, 1 - in linescan order * @param inescan_order 0 low-res tiles have the same order, as gpu_texture_indices, 1 - in linescan order
* @param gpu_diff_rgb_combo low-resolution output, with per-camera mismatch an each color average. Will not be calculated if null * @param gpu_diff_rgb_combo low-resolution output, with per-camera mismatch an each color average. Will not be calculated if null
* @param num_tilesx number of tiles in a row * @param num_tilesx number of tiles in a row
*/ */
......
...@@ -32,9 +32,8 @@ ...@@ -32,9 +32,8 @@
// #define NOCORR // #define NOCORR
// #define NOCORR_TD // #define NOCORR_TD
//#define NOTEXTURES_HOST // #define NOTEXTURES
#define NOTEXTURES // #define NOTEXTURE_RGBA
//#define NOTEXTURE_RGBA
#define SAVE_CLT #define SAVE_CLT
//#define NO_DP //#define NO_DP
...@@ -1793,7 +1792,7 @@ int main(int argc, char **argv) ...@@ -1793,7 +1792,7 @@ int main(int argc, char **argv)
// ----------------- // -----------------
#ifndef NOTEXTURES_HOST #ifndef NOTEXTURES
// cudaProfilerStart(); // cudaProfilerStart();
// testing textures // testing textures
// dim3 threads_texture(TEXTURE_THREADS_PER_TILE, num_cams, 1); // TEXTURE_TILES_PER_BLOCK, 1); // not used // dim3 threads_texture(TEXTURE_THREADS_PER_TILE, num_cams, 1); // TEXTURE_TILES_PER_BLOCK, 1); // not used
...@@ -1900,14 +1899,14 @@ int main(int argc, char **argv) ...@@ -1900,14 +1899,14 @@ int main(int argc, char **argv)
// checkCudaErrors(cudaDeviceSynchronize()); // checkCudaErrors(cudaDeviceSynchronize());
#else // #ifdef NO_DP #else // #ifdef NO_DP
//keep_texture_weights is assumed 0 in textures_nonoverlap //keep_texture_weights is assumed 0 in textures_nonoverlap
// FIXME: update to use new correlations and num_cams
textures_nonoverlap<<<1,1>>> ( //,65536>>> ( textures_nonoverlap<<<1,1>>> ( //,65536>>> (
num_cams, // int num_cams, // number of cameras used num_cams, // int num_cams, // number of cameras used
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats
tp_task_size, // int num_tiles, // number of tiles in task list tp_task_size, // int num_tiles, // number of tiles in task list
// declare arrays in device code? // declare arrays in device code?
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7) gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_num_texture_tiles, // int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array // gpu_num_texture_tiles, // int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
gpu_pnum_texture_tiles, // int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
gpu_clt , // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE] gpu_clt , // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY ! // TODO: use geometry_correction rXY !
gpu_geometry_correction, // struct gc * gpu_geometry_correction, gpu_geometry_correction, // struct gc * gpu_geometry_correction,
...@@ -1917,8 +1916,8 @@ int main(int argc, char **argv) ...@@ -1917,8 +1916,8 @@ int main(int argc, char **argv)
gpu_color_weights, // float weights[3], // scale for R gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average 1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null ) // combining both non-overlap and overlap (each calculated if pointer is not null )
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles, gpu_textures, // float * gpu_texture_tiles,
linescan_order, // int linescan_order, linescan_order, // int linescan_order,
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
TILESX); TILESX);
...@@ -1933,6 +1932,16 @@ int main(int argc, char **argv) ...@@ -1933,6 +1932,16 @@ int main(int argc, char **argv)
float avgTimeTEXTURES = (float)sdkGetTimerValue(&timerTEXTURE) / (float)numIterations; float avgTimeTEXTURES = (float)sdkGetTimerValue(&timerTEXTURE) / (float)numIterations;
sdkDeleteTimer(&timerTEXTURE); sdkDeleteTimer(&timerTEXTURE);
printf("Average Texture run time =%f ms\n", avgTimeTEXTURES); printf("Average Texture run time =%f ms\n", avgTimeTEXTURES);
#ifdef NO_DP
#else
checkCudaErrors(cudaMemcpy(
&cpu_pnum_texture_tiles,
gpu_pnum_texture_tiles,
sizeof(int),
cudaMemcpyDeviceToHost));
printf("cpu_pnum_texture_tiles = %d\n", cpu_pnum_texture_tiles);
#endif
int rslt_texture_size = num_textures * tile_texture_size; int rslt_texture_size = num_textures * tile_texture_size;
checkCudaErrors(cudaMemcpy( checkCudaErrors(cudaMemcpy(
...@@ -2043,136 +2052,7 @@ int main(int argc, char **argv) ...@@ -2043,136 +2052,7 @@ int main(int argc, char **argv)
free (cpu_diff_rgb_combo_out); free (cpu_diff_rgb_combo_out);
checkCudaErrors(cudaFree(gpu_pnum_texture_tiles)); checkCudaErrors(cudaFree(gpu_pnum_texture_tiles));
#endif //NOTEXTURES_HOST #endif //NOTEXTURES
#ifndef NOTEXTURES
// cudaProfilerStart();
// testing textures
// dim3 threads_texture(TEXTURE_THREADS_PER_TILE, num_cams, 1); // TEXTURE_TILES_PER_BLOCK, 1); // not used
// dim3 grid_texture((num_textures + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1); // not used
// printf("threads_texture=(%d, %d, %d)\n",threads_texture.x,threads_texture.y,threads_texture.z);
// printf("grid_texture=(%d, %d, %d)\n",grid_texture.x,grid_texture.y,grid_texture.z);
StopWatchInterface *timerTEXTURE = 0;
sdkCreateTimer(&timerTEXTURE);
int linescan_order = 1; // output low-res in linescan order, 0 - in gpu_texture_indices order
for (int i = i0; i < numIterations; i++)
{
if (i == 0)
{
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&timerTEXTURE);
sdkStartTimer(&timerTEXTURE);
}
// Channel0 weight = 0.294118
// Channel1 weight = 0.117647
// Channel2 weight = 0.588235
// FIXME: update to use new correlations and num_cams
int shared_size = host_get_textures_shared_size( // in bytes
num_cams, // int num_cams, // actual number of cameras
texture_colors, // colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
0); // int * offsets); // in floats
printf("\n1. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, texture_colors);
// cudaFuncSetAttribute(textures_nonoverlap, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_size); // 60000); // 65536); // for CC 7.5
// cudaFuncSetAttribute(textures_nonoverlap, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
textures_nonoverlap<<<1,1>>> ( //,65536>>> (
num_cams, // int num_cams, // number of cameras used
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats
// gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles, // number of tiles in task list
// declare arrays in device code?
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_num_texture_tiles, // int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
gpu_clt , // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
texture_colors, // int colors, // number of colors (3/1)
(texture_colors == 1), // int is_lwir, // do not perform shot correction
gpu_generate_RBGA_params,
gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null )
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles,
linescan_order, // int linescan_order,
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
TILESX);
getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize());
printf("test pass: %d\n",i);
}
/// cudaProfilerStop();
sdkStopTimer(&timerTEXTURE);
float avgTimeTEXTURES = (float)sdkGetTimerValue(&timerTEXTURE) / (float)numIterations;
sdkDeleteTimer(&timerTEXTURE);
printf("Average Texture run time =%f ms\n", avgTimeTEXTURES);
int rslt_texture_size = num_textures * tile_texture_size;
float * cpu_textures = (float *)malloc(rslt_texture_size * sizeof(float));
checkCudaErrors(cudaMemcpy2D( // somethong wrong with size
cpu_textures,
tile_texture_size * sizeof(float),
gpu_textures,
dstride_textures,
tile_texture_size * sizeof(float),
num_textures,
cudaMemcpyDeviceToHost));
int diff_rgb_combo_size = TILESX * TILESY * num_cams * (num_colors + 1);
float * cpu_diff_rgb_combo = (float *)malloc(diff_rgb_combo_size * sizeof(float));
checkCudaErrors(cudaMemcpy(
cpu_diff_rgb_combo,
gpu_diff_rgb_combo,
diff_rgb_combo_size * sizeof(float),
cudaMemcpyDeviceToHost));
#ifndef NSAVE_TEXTURES
printf("Writing phase texture data to %s\n", result_textures_file_dp);
writeFloatsToFile(
cpu_textures, // float * data, // allocated array
rslt_texture_size, // int size, // length in elements
result_textures_file); // const char * path) // file path
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file_dp);
writeFloatsToFile(
cpu_diff_rgb_combo, // float * data, // allocated array
diff_rgb_combo_size, // int size, // length in elements
result_diff_rgb_combo_file); // const char * path) // file path
#ifdef DBG_TILE
#ifdef DEBUG10
int texture_offset = DBG_TILE * tile_texture_size;
int chn = 0;
for (int i = 0; i < tile_texture_size; i++){
if ((i % 256) == 0){
printf("\nchn = %d\n", chn++);
}
printf("%10.4f", *(cpu_textures + texture_offset + i));
if (((i + 1) % 16) == 0){
printf("\n");
} else {
printf(" ");
}
}
#endif // DEBUG9
#endif //#ifdef DBG_TILE
#endif
free(cpu_textures);
free (cpu_diff_rgb_combo);
#endif // ifndef NOTEXTURES
#ifndef NOTEXTURE_RGBAXXX #ifndef NOTEXTURE_RGBAXXX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment