Commit 18d8e56b authored by Andrey Filippov's avatar Andrey Filippov

started updating

parent 94114c14
......@@ -1318,7 +1318,7 @@ extern "C" __global__ void correlate2D_inner(
*
* @param num_tiles, // number of tiles to process (each with num_pairs)
* @param num_pairs, // num pairs per tile (should be the same)
* @param init_output, // !=0 - reset output tiles to zero before accumulating
* @param init_output, // & 1 - reset output tiles to zero before accumulating, &2 no transpose
* @param pairs_mask, // selected pairs (0x3 - horizontal, 0xc - vertical, 0xf - quad, 0x30 - cross)
* @param gpu_corr_indices, // packed tile+pair
* @param gpu_combo_indices, // output if noty null: packed tile+pairs_mask (will point to the first used pair
......@@ -1411,7 +1411,7 @@ extern "C" __global__ void corr2D_combine_inner(
float *clt = clt_corr + threadIdx.x;
float *mem_corr = gpu_corrs_combo + corr_stride_combo * tile_index + threadIdx.x;
if (init_output != 0){ // reset combo
if (init_output & 1){ // reset combo
#pragma unroll
for (int i = 0; i < DTT_SIZE4; i++){
(*clt) = 0.0f;
......@@ -1439,7 +1439,7 @@ extern "C" __global__ void corr2D_combine_inner(
// if (corr_pair > NUM_PAIRS){
// return; // BUG - should not happen
// }
if (PAIRS_HOR_DIAG_MAIN & pair_bit){ // just accumulate. This if-s will branch in all threads, no diversion
if ((PAIRS_HOR_DIAG_MAIN & pair_bit) || (init_output & 2)){ // just accumulate. This if-s will branch in all threads, no diversion
clt = clt_corr + threadIdx.x;
mem_corr = gpu_corrs + corr_stride_combo * corr_tile_index + threadIdx.x;
#pragma unroll
......
......@@ -274,7 +274,7 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.portsxy",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.portsxy"};
#ifndef DBG_TILE
//#ifndef DBG_TILE
const char* ports_clt_file[] = { // never referenced
"/home/eyesis/git/tile_processor_gpu/clt/main_chn0.clt",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.clt",
......@@ -285,7 +285,7 @@ int main(int argc, char **argv)
"/home/eyesis/git/tile_processor_gpu/clt/main_chn1.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn2.rbg",
"/home/eyesis/git/tile_processor_gpu/clt/main_chn3.rbg"};
#endif
//#endif
const char* result_corr_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr.corr";
const char* result_corr_quad_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-quad.corr";
const char* result_corr_cross_file = "/home/eyesis/git/tile_processor_gpu/clt/main_corr-cross.corr";
......@@ -519,7 +519,7 @@ int main(int argc, char **argv)
// segfault in the next
gpu_tasks = (struct tp_task *) copyalloc_kernel_gpu((float * ) &task_data, tp_task_size * (sizeof(struct tp_task)/sizeof(float)));
// build corr_indices - not needed anympore?
// build corr_indices - not needed anymore?
/*
num_corrs = 0;
for (int ty = 0; ty < TILESY; ty++){
......@@ -940,13 +940,13 @@ int main(int argc, char **argv)
3* (IMG_HEIGHT + DTT_SIZE),
cudaMemcpyDeviceToHost));
#ifndef DBG_TILE
///#ifndef DBG_TILE
printf("Writing RBG data to %s\n", result_rbg_file[ncam]);
writeFloatsToFile( // will have margins
cpu_corr_image, // float * data, // allocated array
rslt_img_size, // int size, // length in elements
result_rbg_file[ncam]); // const char * path) // file path
#endif
///#endif
}
free(cpu_corr_image);
......
......@@ -99,7 +99,7 @@
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#undef DBG_MARK_DBG_TILE
//#undef DBG_TILE
//#undef HAS_PRINTF
#define HAS_PRINTF
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment