Commit bfa258e0 authored by Andrey Filippov's avatar Andrey Filippov

minor updates

parent d85c42b5
...@@ -1288,7 +1288,7 @@ extern "C" __global__ void correlate2D_inner( ...@@ -1288,7 +1288,7 @@ extern "C" __global__ void correlate2D_inner(
// copy clt (frequency domain data) // copy clt (frequency domain data)
float * clt_tile1 = ((float *) clt_tiles1) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1); float * clt_tile1 = ((float *) clt_tiles1) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
float * clt_tile2 = ((float *) clt_tiles2) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1); float * clt_tile2 = ((float *) clt_tiles2) + corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
int offs = (tile_num * NUM_COLORS + color) * (4 * DTT_SIZE * DTT_SIZE) + threadIdx.x; int offs = (tile_num * colors + color) * (4 * DTT_SIZE * DTT_SIZE) + threadIdx.x;
float * gpu_tile1 = ((float *) gpu_clt[cam1]) + offs; float * gpu_tile1 = ((float *) gpu_clt[cam1]) + offs;
float * gpu_tile2 = ((float *) gpu_clt[cam2]) + offs; float * gpu_tile2 = ((float *) gpu_clt[cam2]) + offs;
float * clt_tile1i = clt_tile1 + threadIdx.x; float * clt_tile1i = clt_tile1 + threadIdx.x;
...@@ -1892,7 +1892,6 @@ extern "C" __global__ void corr2D_normalize_inner( ...@@ -1892,7 +1892,6 @@ extern "C" __global__ void corr2D_normalize_inner(
* *
* @param num_cams number of cameras * @param num_cams number of cameras
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16 * @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
//* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing * @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param gpu_texture_indices allocated array - 1 integer per tile to process * @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param num_texture_tiles allocated array - 8 integers (may be reduced to 4 later) * @param num_texture_tiles allocated array - 8 integers (may be reduced to 4 later)
...@@ -2126,7 +2125,6 @@ __global__ void clear_texture_rbga( ...@@ -2126,7 +2125,6 @@ __global__ void clear_texture_rbga(
* *
* @param num_cams number of cameras * @param num_cams number of cameras
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16 * @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
//* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing * @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param gpu_texture_indices allocated array - 1 integer per tile to process * @param gpu_texture_indices allocated array - 1 integer per tile to process
* @param num_texture_tiles number of texture tiles to process (allocated 8-element integer array) * @param num_texture_tiles number of texture tiles to process (allocated 8-element integer array)
...@@ -2243,7 +2241,6 @@ __global__ void clear_texture_list( ...@@ -2243,7 +2241,6 @@ __global__ void clear_texture_list(
* *
* @param num_cams number of cameras <= NUM_CAMS * @param num_cams number of cameras <= NUM_CAMS
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16 * @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
//* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing * @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param width number of tiles in a row * @param width number of tiles in a row
* @param gpu_texture_indices allocated array - 1 integer per tile to process * @param gpu_texture_indices allocated array - 1 integer per tile to process
...@@ -2459,7 +2456,6 @@ __global__ void index_direct( ...@@ -2459,7 +2456,6 @@ __global__ void index_direct(
* *
* @param num_cams number of cameras <= NUM_CAMS * @param num_cams number of cameras <= NUM_CAMS
* @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16 * @param gpu_ftasks flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
//* @param gpu_tasks array of per-tile tasks (struct tp_task)
* @param num_tiles number of tiles int gpu_tasks array prepared for processing * @param num_tiles number of tiles int gpu_tasks array prepared for processing
* @param width number of tiles in a row * @param width number of tiles in a row
* @param nonoverlap_list integer array to place the generated list * @param nonoverlap_list integer array to place the generated list
...@@ -3338,7 +3334,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1) ...@@ -3338,7 +3334,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
// always copy 3 (1) colors + alpha // always copy 3 (1) colors + alpha
if (colors == 3){ if (colors == 3){
#pragma unroll #pragma unroll
for (int ncol = 0; ncol < NUM_COLORS + 1; ncol++) { // 4 for (int ncol = 0; ncol < colors + 1; ncol++) { // 4
*(gpu_texture_rbg_gi + ncol * slice_stride) += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21)); *(gpu_texture_rbg_gi + ncol * slice_stride) += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
} }
} else { // assuming colors = 1 } else { // assuming colors = 1
...@@ -3970,7 +3966,9 @@ __device__ void convertCorrectTile( ...@@ -3970,7 +3966,9 @@ __device__ void convertCorrectTile(
int kernels_vert, int kernels_vert,
int tilesx) int tilesx)
{ {
#ifdef DEBUG30
int dbg_tile = (num_colors & 16) != 0; int dbg_tile = (num_colors & 16) != 0;
#endif
num_colors &= 7; num_colors &= 7;
// int tilesx = TILES-X; // int tilesx = TILES-X;
int is_mono = num_colors == 1; int is_mono = num_colors == 1;
......
...@@ -65,7 +65,6 @@ struct tp_task { ...@@ -65,7 +65,6 @@ struct tp_task {
float centerXY[2]; // "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation float centerXY[2]; // "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
// if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy // if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
float xy[NUM_CAMS][2]; float xy[NUM_CAMS][2];
// float target_disparity;
float disp_dist[NUM_CAMS][4]; // calculated with getPortsCoordinates() float disp_dist[NUM_CAMS][4]; // calculated with getPortsCoordinates()
}; };
......
...@@ -868,8 +868,8 @@ int main(int argc, char **argv) ...@@ -868,8 +868,8 @@ int main(int argc, char **argv)
calc_rot_deriv<<<grid_rot,threads_rot>>> ( calc_rot_deriv<<<grid_rot,threads_rot>>> (
num_cams, // int num_cams, num_cams, // int num_cams,
gpu_correction_vector , // struct corr_vector * gpu_correction_vector, gpu_correction_vector , // struct corr_vector * gpu_correction_vector,
gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv); gpu_rot_deriv); // union trot_deriv * gpu_rot_deriv);
getLastCudaError("Kernel failure"); getLastCudaError("Kernel failure");
...@@ -892,8 +892,8 @@ int main(int argc, char **argv) ...@@ -892,8 +892,8 @@ int main(int argc, char **argv)
#define TEST_REVERSE_DISTORTIONS #define TEST_REVERSE_DISTORTIONS
#ifdef TEST_REVERSE_DISTORTIONS #ifdef TEST_REVERSE_DISTORTIONS
dim3 threads_rd(3,3,3); dim3 threads_rd(3,3,3);
// dim3 grid_rd (NUM_CAMS, 1, 1); dim3 grid_rd (NUM_CAMS, 1, 1); // can get rid of NUM_CAMS
dim3 grid_rd (num_cams, 1, 1); // dim3 grid_rd (num_cams, 1, 1);
printf("REVERSE DISTORTIONS: threads_list=(%d, %d, %d)\n",threads_rd.x,threads_rd.y,threads_rd.z); printf("REVERSE DISTORTIONS: threads_list=(%d, %d, %d)\n",threads_rd.x,threads_rd.y,threads_rd.z);
printf("REVERSE DISTORTIONS: grid_list=(%d, %d, %d)\n",grid_rd.x,grid_rd.y,grid_rd.z); printf("REVERSE DISTORTIONS: grid_list=(%d, %d, %d)\n",grid_rd.x,grid_rd.y,grid_rd.z);
...@@ -1255,7 +1255,6 @@ int main(int argc, char **argv) ...@@ -1255,7 +1255,6 @@ int main(int argc, char **argv)
color_weights[2], // 0.5, // float scale2, // scale for G color_weights[2], // 0.5, // float scale2, // scale for G
30.0, // float fat_zero, // here - absolute 30.0, // float fat_zero, // here - absolute
gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16 gpu_ftasks, // float * gpu_ftasks, // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
// gpu_tasks, // struct tp_task * gpu_tasks,
tp_task_size, // int num_tiles) // number of tiles in task tp_task_size, // int num_tiles) // number of tiles in task
TILESX, // int tilesx, // number of tile rows TILESX, // int tilesx, // number of tile rows
gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair gpu_corr_indices, // int * gpu_corr_indices, // packed tile+pair
......
...@@ -41,10 +41,10 @@ ...@@ -41,10 +41,10 @@
#ifndef JCUDA #ifndef JCUDA
#include <stdio.h> #include <stdio.h>
#define THREADSX (DTT_SIZE) #define THREADSX (DTT_SIZE)
#define TEST_LWIR 1 #define TEST_LWIR 0
#define NUM_CAMS 16 // now maximal number of cameras #define NUM_CAMS 16 // now maximal number of cameras
//#define NUM_PAIRS 6 //#define NUM_PAIRS 6
#define NUM_COLORS 1 //3 //#define NUM_COLORS 1 //3
// kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64] // kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64]
#if TEST_LWIR #if TEST_LWIR
#define IMG_WIDTH 640 #define IMG_WIDTH 640
...@@ -72,7 +72,7 @@ ...@@ -72,7 +72,7 @@
#define IMCLT_TILES_PER_BLOCK 4 #define IMCLT_TILES_PER_BLOCK 4
#define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number #define CORR_NTILE_SHIFT 8 // higher bits - number of a pair, other bits tile number
// only lower bit will be used to request correlations, correlation mask will be common for all the scene // only lower bit will be used to request correlations, correlation mask will be common for all the scene
#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile //#define CORR_PAIRS_MASK 0x3f// lower bits used to address correlation pair for the selected tile
#define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile #define CORR_TEXTURE_BIT 7 // bit 7 used to request texture for the tile
#define TASK_CORR_BITS 4 #define TASK_CORR_BITS 4
#define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor #define TASK_TEXTURE_N_BIT 0 // Texture with North neighbor
...@@ -110,8 +110,8 @@ ...@@ -110,8 +110,8 @@
#define DBG_TILE_Y 36 // 88 // 121 // 69 // 111 // 66 #define DBG_TILE_Y 36 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE (DBG_TILE_Y * 80 + DBG_TILE_X) #define DBG_TILE (DBG_TILE_Y * 80 + DBG_TILE_X)
#else #else
#define DBG_TILE_X 32 // 162 // 151 // 161 // 49 #define DBG_TILE_X 114 // 32 // 162 // 151 // 161 // 49
#define DBG_TILE_Y 88 // 121 // 69 // 111 // 66 #define DBG_TILE_Y 51 // 52 // 88 // 121 // 69 // 111 // 66
#define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X) #define DBG_TILE (DBG_TILE_Y * 324 + DBG_TILE_X)
#endif #endif
#undef DBG_MARK_DBG_TILE #undef DBG_MARK_DBG_TILE
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment