Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tile_processor_gpu
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Elphel
tile_processor_gpu
Commits
d9267525
Commit
d9267525
authored
Jan 24, 2022
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
debugging snapshot - not all done
parent
ac351cfe
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
116 additions
and
46 deletions
+116
-46
test_tp.cu
src/test_tp.cu
+114
-44
tp_defines.h
src/tp_defines.h
+2
-2
No files found.
src/test_tp.cu
View file @
d9267525
...
@@ -31,11 +31,14 @@
...
@@ -31,11 +31,14 @@
*/
*/
// #define NOCORR
// #define NOCORR
//#define NOCORR_TD
//
#define NOCORR_TD
//#define NOTEXTURES_HOST
//#define NOTEXTURES_HOST
#define NOTEXTURES
#define NOTEXTURES
#define NOTEXTURE_RGBA
//
#define NOTEXTURE_RGBA
#define SAVE_CLT
#define SAVE_CLT
//#define NO_DP
#include <stdio.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
...
@@ -504,7 +507,8 @@ void generate_RBGA_host(
...
@@ -504,7 +507,8 @@ void generate_RBGA_host(
num_cams, // int num_cams, // actual number of cameras
num_cams, // int num_cams, // actual number of cameras
colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
0); // int * offsets); // in floats
0); // int * offsets); // in floats
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
printf("\n2. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, colors);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_size); // 65536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
textures_accumulate <<<grid_texture,threads_texture, shared_size>>>(
textures_accumulate <<<grid_texture,threads_texture, shared_size>>>(
num_cams, // int num_cams, // number of cameras used
num_cams, // int num_cams, // number of cameras used
...
@@ -682,9 +686,14 @@ int main(int argc, char **argv)
...
@@ -682,9 +686,14 @@ int main(int argc, char **argv)
const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_corr-quad.corr";
const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_corr-quad.corr";
const char* result_corr_td_norm_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_corr-td-norm.corr";
const char* result_corr_td_norm_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_corr-td-norm.corr";
/// const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_corr-cross.corr";
/// const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_corr-cross.corr";
const char* result_textures_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_texture_aux.rgba";
const char* result_textures_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_texture_nodp.rgba";
const char* result_diff_rgb_combo_file ="/home/eyesis/git/tile_processor_gpu/clt/aux_diff_rgb_combo.drbg";
const char* result_diff_rgb_combo_file ="/home/eyesis/git/tile_processor_gpu/clt/aux_diff_rgb_combo_nodp.drbg";
const char* result_textures_rgba_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_texture_rgba_aux.rgba";
const char* result_textures_rgba_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_texture_rgba_nodp.rgba";
const char* result_textures_file_dp = "/home/eyesis/git/tile_processor_gpu/clt/aux_texture_dp.rgba";
const char* result_diff_rgb_combo_file_dp ="/home/eyesis/git/tile_processor_gpu/clt/aux_diff_rgb_combo_dp.drbg";
const char* result_textures_rgba_file_dp = "/home/eyesis/git/tile_processor_gpu/clt/aux_texture_rgba_dp.rgba";
const char* rByRDist_file = "/home/eyesis/git/tile_processor_gpu/clt/aux.rbyrdist";
const char* rByRDist_file = "/home/eyesis/git/tile_processor_gpu/clt/aux.rbyrdist";
const char* correction_vector_file = "/home/eyesis/git/tile_processor_gpu/clt/aux.correction_vector";
const char* correction_vector_file = "/home/eyesis/git/tile_processor_gpu/clt/aux.correction_vector";
...
@@ -744,9 +753,15 @@ int main(int argc, char **argv)
...
@@ -744,9 +753,15 @@ int main(int argc, char **argv)
const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-quad.corr";
const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-quad.corr";
const char* result_corr_td_norm_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_corr-td-norm.corr";
const char* result_corr_td_norm_file = "/home/eyesis/git/tile_processor_gpu/clt/aux_corr-td-norm.corr";
/// const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-cross.corr";
/// const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-cross.corr";
const char* result_textures_file = "/home/eyesis/git/tile_processor_gpu/clt/main_texture.rgba";
const char* result_textures_file = "/home/eyesis/git/tile_processor_gpu/clt/main_texture_nodp.rgba";
const char* result_diff_rgb_combo_file ="/home/eyesis/git/tile_processor_gpu/clt/main_diff_rgb_combo.drbg";
const char* result_diff_rgb_combo_file ="/home/eyesis/git/tile_processor_gpu/clt/main_diff_rgb_combo_nodp.drbg";
const char* result_textures_rgba_file = "/home/eyesis/git/tile_processor_gpu/clt/main_texture_rgba.rgba";
const char* result_textures_rgba_file = "/home/eyesis/git/tile_processor_gpu/clt/main_texture_rgba_nodp.rgba";
const char* result_textures_file_dp = "/home/eyesis/git/tile_processor_gpu/clt/main_texture_dp.rgba";
const char* result_diff_rgb_combo_file_dp = "/home/eyesis/git/tile_processor_gpu/clt/main_diff_rgb_combo_dp.drbg";
const char* result_textures_rgba_file_dp = "/home/eyesis/git/tile_processor_gpu/clt/main_texture_rgba_dp.rgba";
const char* rByRDist_file = "/home/eyesis/git/tile_processor_gpu/clt/main.rbyrdist";
const char* rByRDist_file = "/home/eyesis/git/tile_processor_gpu/clt/main.rbyrdist";
const char* correction_vector_file = "/home/eyesis/git/tile_processor_gpu/clt/main.correction_vector";
const char* correction_vector_file = "/home/eyesis/git/tile_processor_gpu/clt/main.correction_vector";
const char* geometry_correction_file = "/home/eyesis/git/tile_processor_gpu/clt/main.geometry_correction";
const char* geometry_correction_file = "/home/eyesis/git/tile_processor_gpu/clt/main.geometry_correction";
...
@@ -1095,7 +1110,7 @@ int main(int argc, char **argv)
...
@@ -1095,7 +1110,7 @@ int main(int argc, char **argv)
#endif
#endif
int corr_size = 2 * CORR_OUT_RAD + 1;
int corr_size = 2 * CORR_OUT_RAD + 1;
int num_tiles =
TILESX * TILESYA;
int num_tiles =
tp_task_size; // TILESX * TILESYA; //Was this on 01/22/2022
int num_corr_indices = num_pairs * num_tiles;
int num_corr_indices = num_pairs * num_tiles;
float * corr_img; // = (float *)malloc(corr_img_size * sizeof(float));
float * corr_img; // = (float *)malloc(corr_img_size * sizeof(float));
...
@@ -1807,7 +1822,11 @@ int main(int argc, char **argv)
...
@@ -1807,7 +1822,11 @@ int main(int argc, char **argv)
sdkResetTimer(&timerTEXTURE);
sdkResetTimer(&timerTEXTURE);
sdkStartTimer(&timerTEXTURE);
sdkStartTimer(&timerTEXTURE);
}
}
int shared_size = host_get_textures_shared_size( // in bytes
num_cams, // int num_cams, // actual number of cameras
texture_colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
0); // int * offsets); // in floats
printf("\n1. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, texture_colors);
//*pnum_texture_tiles = 0;
//*pnum_texture_tiles = 0;
cpu_pnum_texture_tiles = 0;
cpu_pnum_texture_tiles = 0;
checkCudaErrors(cudaMemcpy(
checkCudaErrors(cudaMemcpy(
...
@@ -1815,6 +1834,10 @@ int main(int argc, char **argv)
...
@@ -1815,6 +1834,10 @@ int main(int argc, char **argv)
&cpu_pnum_texture_tiles,
&cpu_pnum_texture_tiles,
sizeof(int),
sizeof(int),
cudaMemcpyHostToDevice));
cudaMemcpyHostToDevice));
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_size); // 65536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
#ifdef NO_DP
create_nonoverlap_list<<<blocks0,threads0>>>(
create_nonoverlap_list<<<blocks0,threads0>>>(
num_cams, // int num_cams,
num_cams, // int num_cams,
...
@@ -1831,26 +1854,27 @@ int main(int argc, char **argv)
...
@@ -1831,26 +1854,27 @@ int main(int argc, char **argv)
sizeof(int),
sizeof(int),
cudaMemcpyDeviceToHost));
cudaMemcpyDeviceToHost));
printf("cpu_pnum_texture_tiles = %d\n", cpu_pnum_texture_tiles);
printf("cpu_pnum_texture_tiles = %d\n", cpu_pnum_texture_tiles);
int num_cams_per_thread = NUM_THREADS / TEXTURE_THREADS_PER_TILE; // 4 cameras parallel, then repeat
int num_cams_per_thread = NUM_THREADS / TEXTURE_THREADS_PER_TILE; // 4 cameras parallel, then repeat
dim3 threads_texture1(TEXTURE_THREADS_PER_TILE, num_cams_per_thread, 1); // TEXTURE_TILES_PER_BLOCK, 1);
dim3 threads_texture1(TEXTURE_THREADS_PER_TILE, num_cams_per_thread, 1); // TEXTURE_TILES_PER_BLOCK, 1);
dim3 grid_texture1((cpu_pnum_texture_tiles + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1);
dim3 grid_texture1((cpu_pnum_texture_tiles + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1);
printf("threads_texture1=(%d, %d, %d)\n",threads_texture1.x,threads_texture1.y,threads_texture1.z);
printf("threads_texture1=(%d, %d, %d)\n",threads_texture1.x,threads_texture1.y,threads_texture1.z);
printf("grid_texture1=(%d, %d, %d)\n",grid_texture1.x,grid_texture1.y,grid_texture1.z);
printf("grid_texture1=(%d, %d, %d)\n",grid_texture1.x,grid_texture1.y,grid_texture1.z);
int shared_size = host_get_textures_shared_size( // in bytes
//
int shared_size = host_get_textures_shared_size( // in bytes
num_cams, // int num_cams, // actual number of cameras
//
num_cams, // int num_cams, // actual number of cameras
texture_colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
//
texture_colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
0); // int * offsets); // in floats
//
0); // int * offsets); // in floats
printf("\n1. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, texture_colors);
//
printf("\n1. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, texture_colors);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
textures_accumulate <<<grid_texture1,threads_texture1, shared_size>>>( // 65536>>>( //
textures_accumulate <<<grid_texture1,threads_texture1, shared_size>>>( // 65536>>>( //
num_cams, // int num_cams, // number of cameras used
num_cams, // int num_cams, // number of cameras used
(int *) 0, // int * woi, // x, y, width,height
(int *) 0, // int * woi, // x, y, width,height
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
gpu_clt, // float ** gpu_clt, // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
cpu_pnum_texture_tiles, // *pnum_texture_tiles, // size_t num_texture_tiles, // number of texture tiles to process
cpu_pnum_texture_tiles,
// *pnum_texture_tiles, // size_t num_texture_tiles, // number of texture tiles to process
0, // gpu_texture_indices_offset,// add to gpu_texture_indices
0, // gpu_texture_indices_offset,// add to gpu_texture_indices
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
...
@@ -1872,8 +1896,36 @@ int main(int argc, char **argv)
...
@@ -1872,8 +1896,36 @@ int main(int argc, char **argv)
linescan_order, // int linescan_order, // if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
linescan_order, // int linescan_order, // if !=0 then output gpu_diff_rgb_combo in linescan order, else - in gpu_texture_indices order
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo) // diff[num_cams], R[num_cams], B[num_cams],G[num_cams]
TILESX);
TILESX);
// getLastCudaError("Kernel failure");
// checkCudaErrors(cudaDeviceSynchronize());
#else // #ifdef NO_DP
//keep_texture_weights is assumed 0 in textures_nonoverlap
// FIXME: update to use new correlations and num_cams
textures_nonoverlap<<<1,1>>> ( //,65536>>> (
num_cams, // int num_cams, // number of cameras used
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats
tp_task_size, // int num_tiles, // number of tiles in task list
// declare arrays in device code?
gpu_texture_indices, // int * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
gpu_num_texture_tiles, // int * pnum_texture_tiles, // returns total number of elements in gpu_texture_indices array
gpu_clt , // float ** gpu_clt, // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
// TODO: use geometry_correction rXY !
gpu_geometry_correction, // struct gc * gpu_geometry_correction,
texture_colors, // int colors, // number of colors (3/1)
(texture_colors == 1), // int is_lwir, // do not perform shot correction
gpu_generate_RBGA_params,
gpu_color_weights, // float weights[3], // scale for R
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null )
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles,
linescan_order, // int linescan_order,
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
TILESX);
#endif
getLastCudaError("Kernel failure");
getLastCudaError("Kernel failure");
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaDeviceSynchronize());
printf("test pass: %d\n",i);
printf("test pass: %d\n",i);
}
}
/// cudaProfilerStop();
/// cudaProfilerStop();
...
@@ -1915,10 +1967,9 @@ int main(int argc, char **argv)
...
@@ -1915,10 +1967,9 @@ int main(int argc, char **argv)
int src_index1 = src_index0 + 16 * iy;
int src_index1 = src_index0 + 16 * iy;
int dst_index1 = dst_index0 + iy * (16 * TILESX);
int dst_index1 = dst_index0 + iy * (16 * TILESX);
for (int ix = 0; ix < 16; ix++){
for (int ix = 0; ix < 16; ix++){
// int src_index = src_index1 + ix;
// int dst_index = dst_index1 + ix;
int src_index= itile * tile_texture_size + 256 * ilayer + 16 * iy + ix;
int src_index= itile * tile_texture_size + 256 * ilayer + 16 * iy + ix;
int dst_index = ilayer * (TILESX * TILESYA * 256) + (tileY * 16 + iy) * (16 * TILESX) + (tileX * 16) + ix;
//// int dst_index = ilayer * (TILESX * TILESYA * 256) + (tileY * 16 + iy) * (16 * TILESX) + (tileX * 16) + ix;
int dst_index = ilayer * (TILESX * TILESY * 256) + (tileY * 16 + iy) * (16 * TILESX) + (tileX * 16) + ix;
non_overlap_layers[dst_index] = cpu_textures[src_index];
non_overlap_layers[dst_index] = cpu_textures[src_index];
}
}
}
}
...
@@ -1944,31 +1995,31 @@ int main(int argc, char **argv)
...
@@ -1944,31 +1995,31 @@ int main(int argc, char **argv)
#ifndef NSAVE_TEXTURES
#ifndef NSAVE_TEXTURES
#ifdef NO_DP
printf("Writing phase texture data to %s\n", result_textures_file);
printf("Writing phase texture data to %s\n", result_textures_file);
/*
writeFloatsToFile(
cpu_textures, // float * data, // allocated array
rslt_texture_size, // int size, // length in elements
result_textures_file); // const char * path) // file path
*/
writeFloatsToFile(
writeFloatsToFile(
non_overlap_layers, // float * data, // allocated array
non_overlap_layers, // float * data, // allocated array
rslt_texture_size, // int size, // length in elements
rslt_texture_size, // int size, // length in elements
result_textures_file); // const char * path) // file path
result_textures_file); // const char * path) // file path
/*
* non_overlap_layers
writeFloatsToFile(
cpu_diff_rgb_combo, // cpu_diff_rgb_combo, // float * data, // allocated array
diff_rgb_combo_size, // int size, // length in elements
result_textures_file); // const char * path) // file path
*/
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file);
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file);
writeFloatsToFile(
writeFloatsToFile(
cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array
cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array
diff_rgb_combo_size, // int size, // length in elements
diff_rgb_combo_size, // int size, // length in elements
result_diff_rgb_combo_file); // const char * path) // file path
result_diff_rgb_combo_file); // const char * path) // file path
#else
printf("Writing phase texture data to %s\n", result_textures_file_dp);
writeFloatsToFile(
non_overlap_layers, // float * data, // allocated array
rslt_texture_size, // int size, // length in elements
result_textures_file_dp); // const char * path) // file path
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file_dp);
writeFloatsToFile(
cpu_diff_rgb_combo_out, // cpu_diff_rgb_combo, // float * data, // allocated array
diff_rgb_combo_size, // int size, // length in elements
result_diff_rgb_combo_file_dp); // const char * path) // file path
#endif
#ifdef DBG_TILE
#ifdef DBG_TILE
#ifdef DEBUG10
#ifdef DEBUG10
int texture_offset = DBG_TILE * tile_texture_size;
int texture_offset = DBG_TILE * tile_texture_size;
...
@@ -2021,8 +2072,13 @@ int main(int argc, char **argv)
...
@@ -2021,8 +2072,13 @@ int main(int argc, char **argv)
// Channel2 weight = 0.588235
// Channel2 weight = 0.588235
// FIXME: update to use new correlations and num_cams
// FIXME: update to use new correlations and num_cams
int shared_size = host_get_textures_shared_size( // in bytes
num_cams, // int num_cams, // actual number of cameras
texture_colors, // colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
0); // int * offsets); // in floats
printf("\n1. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, texture_colors);
// cudaFuncSetAttribute(textures_nonoverlap, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
// cudaFuncSetAttribute(textures_nonoverlap, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize,
shared_size); // 60000); //
65536); // for CC 7.5
// cudaFuncSetAttribute(textures_nonoverlap, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
// cudaFuncSetAttribute(textures_nonoverlap, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributePreferredSharedMemoryCarveout,cudaSharedmemCarveoutMaxShared);
textures_nonoverlap<<<1,1>>> ( //,65536>>> (
textures_nonoverlap<<<1,1>>> ( //,65536>>> (
...
@@ -2043,8 +2099,8 @@ int main(int argc, char **argv)
...
@@ -2043,8 +2099,8 @@ int main(int argc, char **argv)
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
1, // int dust_remove, // Do not reduce average weight when only one image differes much from the average
// combining both non-overlap and overlap (each calculated if pointer is not null )
// combining both non-overlap and overlap (each calculated if pointer is not null )
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
0, // dstride_textures/sizeof(float), // size_t texture_stride, // in floats (now 256*4 = 1024) // may be 0 if not needed
(float *) 0, // gpu_textures, // float * gpu_texture_tiles,
// (number of colors +1 + ?)*16*16 rgba texture tiles // may be 0 if not needed\
(float *) 0, // gpu_textures, // float * gpu_texture_tiles,
linescan_order, // int linescan_order,
// 0 low-res tiles have tghe same order, as gpu_texture_indices, 1 - in linescan order
linescan_order, // int linescan_order,
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
gpu_diff_rgb_combo, //); // float * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
TILESX);
TILESX);
getLastCudaError("Kernel failure");
getLastCudaError("Kernel failure");
...
@@ -2080,13 +2136,13 @@ int main(int argc, char **argv)
...
@@ -2080,13 +2136,13 @@ int main(int argc, char **argv)
#ifndef NSAVE_TEXTURES
#ifndef NSAVE_TEXTURES
printf("Writing phase texture data to %s\n", result_textures_file);
printf("Writing phase texture data to %s\n", result_textures_file
_dp
);
writeFloatsToFile(
writeFloatsToFile(
cpu_textures, // float * data, // allocated array
cpu_textures, // float * data, // allocated array
rslt_texture_size, // int size, // length in elements
rslt_texture_size, // int size, // length in elements
result_textures_file); // const char * path) // file path
result_textures_file); // const char * path) // file path
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file);
printf("Writing low-res data to %s\n", result_diff_rgb_combo_file
_dp
);
writeFloatsToFile(
writeFloatsToFile(
cpu_diff_rgb_combo, // float * data, // allocated array
cpu_diff_rgb_combo, // float * data, // allocated array
diff_rgb_combo_size, // int size, // length in elements
diff_rgb_combo_size, // int size, // length in elements
...
@@ -2117,7 +2173,7 @@ int main(int argc, char **argv)
...
@@ -2117,7 +2173,7 @@ int main(int argc, char **argv)
#define NO_DP
#ifndef NOTEXTURE_RGBAXXX
#ifndef NOTEXTURE_RGBAXXX
dim3 threads_rgba(1, 1, 1);
dim3 threads_rgba(1, 1, 1);
...
@@ -2163,7 +2219,13 @@ int main(int argc, char **argv)
...
@@ -2163,7 +2219,13 @@ int main(int argc, char **argv)
dstride_textures_rbga/sizeof(float), // const size_t texture_rbga_stride, // in floats
dstride_textures_rbga/sizeof(float), // const size_t texture_rbga_stride, // in floats
gpu_textures_rbga); // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
gpu_textures_rbga); // float * gpu_texture_tiles) // (number of colors +1 + ?)*16*16 rgba texture tiles
#else
#else
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, 65536); // for CC 7.5
int shared_size = host_get_textures_shared_size( // in bytes
num_cams, // int num_cams, // actual number of cameras
texture_colors, // colors, // int num_colors, // actual number of colors: 3 for RGB, 1 for LWIR/mono
0); // int * offsets); // in floats
printf("\n1. shared_size=%d, num_cams=%d, colors=%d\n",shared_size,num_cams, texture_colors);
cudaFuncSetAttribute(textures_accumulate, cudaFuncAttributeMaxDynamicSharedMemorySize, shared_size); // 60000); // 5536); // for CC 7.5
generate_RBGA<<<1,1>>> (
generate_RBGA<<<1,1>>> (
num_cams, // int num_cams, // number of cameras used
num_cams, // int num_cams, // number of cameras used
// Parameters to generate texture tasks
// Parameters to generate texture tasks
...
@@ -2223,11 +2285,19 @@ int main(int argc, char **argv)
...
@@ -2223,11 +2285,19 @@ int main(int argc, char **argv)
cudaMemcpyDeviceToHost));
cudaMemcpyDeviceToHost));
#ifndef NSAVE_TEXTURES
#ifndef NSAVE_TEXTURES
#ifdef NO_DP
printf("Writing RBGA texture slices to %s\n", result_textures_rgba_file);
printf("Writing RBGA texture slices to %s\n", result_textures_rgba_file);
writeFloatsToFile(
writeFloatsToFile(
cpu_textures_rgba, // float * data, // allocated array
cpu_textures_rgba, // float * data, // allocated array
rslt_rgba_size, // int size, // length in elements
rslt_rgba_size, // int size, // length in elements
result_textures_rgba_file); // const char * path) // file path
result_textures_rgba_file); // const char * path) // file path
#else
printf("Writing RBGA texture slices to %s\n", result_textures_rgba_file_dp);
writeFloatsToFile(
cpu_textures_rgba, // float * data, // allocated array
rslt_rgba_size, // int size, // length in elements
result_textures_rgba_file_dp); // const char * path) // file path
#endif
#endif
#endif
#ifdef DBG_TILE
#ifdef DBG_TILE
...
...
src/tp_defines.h
View file @
d9267525
...
@@ -45,7 +45,7 @@
...
@@ -45,7 +45,7 @@
//#define NUM_PAIRS 6
//#define NUM_PAIRS 6
//#define NUM_COLORS 1 //3
//#define NUM_COLORS 1 //3
// kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64]
// kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64]
#define TEST_LWIR 1
//
#define TEST_LWIR 1
#define KERNELS_LSTEP 4
#define KERNELS_LSTEP 4
#define THREADS_PER_TILE 8
#define THREADS_PER_TILE 8
#define TILES_PER_BLOCK 4
#define TILES_PER_BLOCK 4
...
@@ -79,7 +79,7 @@
...
@@ -79,7 +79,7 @@
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define RBYRDIST_STEP 0.0004 // for doubles, 0.0002 - floats // to fit into GPU shared memory (was 0.001);
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
#define TILES_PER_BLOCK_GEOM (32/NUM_CAMS) // each tile has NUM_CAMS threads
#define DEBUG_ANY 1
#ifdef DEBUG_ANY
#ifdef DEBUG_ANY
//#define DEBUG_OOB1 1
//#define DEBUG_OOB1 1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment