Commit 18d8e56b authored by Andrey Filippov's avatar Andrey Filippov

started updating

parent 94114c14
...@@ -1318,7 +1318,7 @@ extern "C" __global__ void correlate2D_inner( ...@@ -1318,7 +1318,7 @@ extern "C" __global__ void correlate2D_inner(
* *
* @param num_tiles, // number of tiles to process (each with num_pairs) * @param num_tiles, // number of tiles to process (each with num_pairs)
* @param num_pairs, // num pairs per tile (should be the same) * @param num_pairs, // num pairs per tile (should be the same)
* @param init_output, // !=0 - reset output tiles to zero before accumulating * @param init_output, // & 1 - reset output tiles to zero before accumulating, &2 no transpose
* @param pairs_mask, // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross) * @param pairs_mask, // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
* @param gpu_corr_indices, // packed tile+pair * @param gpu_corr_indices, // packed tile+pair
* @param gpu_combo_indices, // output if noty null: packed tile+pairs_mask (will point to the first used pair * @param gpu_combo_indices, // output if noty null: packed tile+pairs_mask (will point to the first used pair
...@@ -1411,7 +1411,7 @@ extern "C" __global__ void corr2D_combine_inner( ...@@ -1411,7 +1411,7 @@ extern "C" __global__ void corr2D_combine_inner(
float *clt = clt_corr + threadIdx.x; float *clt = clt_corr + threadIdx.x;
float *mem_corr = gpu_corrs_combo + corr_stride_combo * tile_index + threadIdx.x; float *mem_corr = gpu_corrs_combo + corr_stride_combo * tile_index + threadIdx.x;
if (init_output != 0){ // reset combo if (init_output & 1){ // reset combo
#pragma unroll #pragma unroll
for (int i = 0; i < DTT_SIZE4; i++){ for (int i = 0; i < DTT_SIZE4; i++){
(*clt) = 0.0f; (*clt) = 0.0f;
...@@ -1439,7 +1439,7 @@ extern "C" __global__ void corr2D_combine_inner( ...@@ -1439,7 +1439,7 @@ extern "C" __global__ void corr2D_combine_inner(
// if (corr_pair > NUM_PAIRS){ // if (corr_pair > NUM_PAIRS){
// return; // BUG - should not happen // return; // BUG - should not happen
// } // }
if (PAIRS_HOR_DIAG_MAIN & pair_bit){ // just accumulate. This if-s will branch in all threads, no diversion if ((PAIRS_HOR_DIAG_MAIN & pair_bit) || (init_output & 2)){ // just accumulate. This if-s will branch in all threads, no diversion
clt = clt_corr + threadIdx.x; clt = clt_corr + threadIdx.x;
mem_corr = gpu_corrs + corr_stride_combo * corr_tile_index + threadIdx.x; mem_corr = gpu_corrs + corr_stride_combo * corr_tile_index + threadIdx.x;
#pragma unroll #pragma unroll
......
...@@ -274,7 +274,7 @@ int main(int argc, char **argv) ...@@ -274,7 +274,7 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.portsxy", "/home/eyesis/git/tile_processor_gpu/clt/main_chn2.portsxy",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.portsxy"}; "/home/eyesis/git/tile_processor_gpu/clt/main_chn3.portsxy"};
#ifndef DBG_TILE //#ifndef DBG_TILE
const char* ports_clt_file[] = { // never referenced const char* ports_clt_file[] = { // never referenced
"/home/eyesis/git/tile_processor_gpu/clt/main_chn0.clt", "/home/eyesis/git/tile_processor_gpu/clt/main_chn0.clt",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.clt", "/home/eyesis/git/tile_processor_gpu/clt/main_chn1.clt",
...@@ -285,7 +285,7 @@ int main(int argc, char **argv) ...@@ -285,7 +285,7 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.rbg", "/home/eyesis/git/tile_processor_gpu/clt/main_chn1.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.rbg", "/home/eyesis/git/tile_processor_gpu/clt/main_chn2.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.rbg"}; "/home/eyesis/git/tile_processor_gpu/clt/main_chn3.rbg"};
#endif //#endif
const char* result_corr_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr.corr"; const char* result_corr_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr.corr";
const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-quad.corr"; const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-quad.corr";
const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-cross.corr"; const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-cross.corr";
...@@ -519,7 +519,7 @@ int main(int argc, char **argv) ...@@ -519,7 +519,7 @@ int main(int argc, char **argv)
// segfault in the next // segfault in the next
gpu_tasks = (struct tp_task *) copyalloc_kernel_gpu((float * ) &task_data, tp_task_size * (sizeof(struct tp_task)/sizeof(float))); gpu_tasks = (struct tp_task *) copyalloc_kernel_gpu((float * ) &task_data, tp_task_size * (sizeof(struct tp_task)/sizeof(float)));
// build corr_indices - not needed anympore? // build corr_indices - not needed anymore?
/* /*
num_corrs = 0; num_corrs = 0;
for (int ty = 0; ty < TILESY; ty++){ for (int ty = 0; ty < TILESY; ty++){
...@@ -940,13 +940,13 @@ int main(int argc, char **argv) ...@@ -940,13 +940,13 @@ int main(int argc, char **argv)
3* (IMG_HEIGHT + DTT_SIZE), 3* (IMG_HEIGHT + DTT_SIZE),
cudaMemcpyDeviceToHost)); cudaMemcpyDeviceToHost));
#ifndef DBG_TILE ///#ifndef DBG_TILE
printf("Writing RBG data to %s\n", result_rbg_file[ncam]); printf("Writing RBG data to %s\n", result_rbg_file[ncam]);
writeFloatsToFile( // will have margins writeFloatsToFile( // will have margins
cpu_corr_image, // float * data, // allocated array cpu_corr_image, // float * data, // allocated array
rslt_img_size, // int size, // length in elements rslt_img_size, // int size, // length in elements
result_rbg_file[ncam]); // const char * path) // file path result_rbg_file[ncam]); // const char * path) // file path
#endif ///#endif
} }
free(cpu_corr_image); free(cpu_corr_image);
......
...@@ -99,7 +99,7 @@ ...@@ -99,7 +99,7 @@
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X) #define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#undef DBG_MARK_DBG_TILE #undef DBG_MARK_DBG_TILE
//#undef DBG_TILE
//#undef HAS_PRINTF //#undef HAS_PRINTF
#define HAS_PRINTF #define HAS_PRINTF
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment