minor updates

bfa258e0 · Andrey Filippov · d85c42b5 · bfa258e0 · bfa258e0 · bfa258e0
Commit bfa258e0 authored Dec 03, 2021 by Andrey Filippov
Showing with 13 additions and 17 deletions

TileProcessor.cuh src/TileProcessor.cuh +4 -6

geometry_correction.h src/geometry_correction.h +0 -1

test_tp.cu src/test_tp.cu +4 -5

tp_defines.h src/tp_defines.h +5 -5

No files found.
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
@@ -1288,7 +1288,7 @@ extern "C" __global__ void correlate2D_inner(
        // copy clt (frequency domain data)
        float * clt_tile1 = ((float *) clt_tiles1) +  corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
        float * clt_tile2 = ((float *) clt_tiles2) +  corr_in_block * (4 * DTT_SIZE * DTT_SIZE1);
-        int offs = (tile_num * NUM_COLORS + color) * (4 * DTT_SIZE * DTT_SIZE) + threadIdx.x;
+        int offs = (tile_num * colors + color) * (4 * DTT_SIZE * DTT_SIZE) + threadIdx.x;
        float * gpu_tile1 = ((float *) gpu_clt[cam1]) + offs;
        float * gpu_tile2 = ((float *) gpu_clt[cam2]) + offs;
 		float * clt_tile1i = clt_tile1 + threadIdx.x;
@@ -1892,7 +1892,6 @@ extern "C" __global__ void corr2D_normalize_inner(
 *
 * @param num_cams             number of cameras
 * @param gpu_ftasks           flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
- //* @param gpu_tasks            array of per-tile tasks (struct tp_task)
 * @param num_tiles            number of tiles int gpu_tasks array prepared for processing
 * @param gpu_texture_indices  allocated array - 1 integer per tile to process
 * @param num_texture_tiles    allocated array - 8 integers (may be reduced to 4 later)
@@ -2126,7 +2125,6 @@ __global__ void clear_texture_rbga(
 *
 * @param num_cams             number of cameras
 * @param gpu_ftasks           flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
- //* @param gpu_tasks            array of per-tile tasks (struct tp_task)
 * @param num_tiles            number of tiles int gpu_tasks array prepared for processing
 * @param gpu_texture_indices  allocated array - 1 integer per tile to process
 * @param num_texture_tiles    number of texture tiles to process (allocated 8-element integer array)
@@ -2243,7 +2241,6 @@ __global__ void clear_texture_list(
 *
 * @param num_cams             number of cameras <= NUM_CAMS
 * @param gpu_ftasks           flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
-//* @param gpu_tasks            array of per-tile tasks (struct tp_task)
 * @param num_tiles            number of tiles int gpu_tasks array prepared for processing
 * @param width                number of tiles in a row
 * @param gpu_texture_indices  allocated array - 1 integer per tile to process
@@ -2459,7 +2456,6 @@ __global__ void index_direct(
 *
 * @param num_cams         number of cameras <= NUM_CAMS
 * @param gpu_ftasks           flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
- //* @param gpu_tasks            array of per-tile tasks (struct tp_task)
 * @param num_tiles            number of tiles int gpu_tasks array prepared for processing
 * @param width                number of tiles in a row
 * @param nonoverlap_list      integer array to place the generated list
@@ -3338,7 +3334,7 @@ extern "C" __global__ void textures_accumulate( // (8,4,1) (N,1,1)
 			// always copy 3 (1) colors + alpha
 			if (colors == 3){
 #pragma unroll
-				for (int ncol = 0; ncol < NUM_COLORS + 1; ncol++) { // 4
+				for (int ncol = 0; ncol < colors + 1; ncol++) { // 4
 					*(gpu_texture_rbg_gi + ncol * slice_stride) += *(rgba_i + ncol * (DTT_SIZE2 * DTT_SIZE21));
 				}
 			} else { // assuming colors = 1
@@ -3970,7 +3966,9 @@ __device__ void convertCorrectTile(
 		int                kernels_vert,
 		int                tilesx)
 {
+#ifdef DEBUG30
 	int dbg_tile = (num_colors & 16) != 0;
+#endif
 	num_colors &= 7;
 //	int tilesx = TILES-X;
 	int is_mono = num_colors == 1;

--- a/src/geometry_correction.h
+++ b/src/geometry_correction.h
@@ -65,7 +65,6 @@ struct tp_task {
 	float centerXY[2];          // "ideal" centerX, centerY to use instead of the uniform tile centers (txy) for interscene accumulation
 	                            // if isnan(centerXY[0]), then txy is used to calculate centerXY and all xy
 	float xy[NUM_CAMS][2];
-//	float target_disparity;
 	float disp_dist[NUM_CAMS][4]; // calculated with getPortsCoordinates()
 };

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -868,8 +868,8 @@ int main(int argc, char **argv)
    	calc_rot_deriv<<<grid_rot,threads_rot>>> (
    			num_cams,                // int                  num_cams,
-    			gpu_correction_vector ,           // 		struct corr_vector * gpu_correction_vector,
+    			gpu_correction_vector ,  // struct corr_vector * gpu_correction_vector,
-    			gpu_rot_deriv);                  // union trot_deriv   * gpu_rot_deriv);
+    			gpu_rot_deriv);          // union trot_deriv   * gpu_rot_deriv);
    	getLastCudaError("Kernel failure");
@@ -892,8 +892,8 @@ int main(int argc, char **argv)
 #define TEST_REVERSE_DISTORTIONS
 #ifdef  TEST_REVERSE_DISTORTIONS
    dim3 threads_rd(3,3,3);
-//    dim3 grid_rd   (NUM_CAMS, 1, 1);
+    dim3 grid_rd   (NUM_CAMS, 1, 1); // can get rid of NUM_CAMS
-    dim3 grid_rd   (num_cams, 1, 1);
+//    dim3 grid_rd   (num_cams, 1, 1);
    printf("REVERSE DISTORTIONS: threads_list=(%d, %d, %d)\n",threads_rd.x,threads_rd.y,threads_rd.z);
    printf("REVERSE DISTORTIONS: grid_list=(%d, %d, %d)\n",grid_rd.x,grid_rd.y,grid_rd.z);
@@ -1255,7 +1255,6 @@ int main(int argc, char **argv)
 				color_weights[2], // 0.5,   // float             scale2,             // scale for G
 				30.0,                       // float             fat_zero,           // here - absolute
 				gpu_ftasks,                 // float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
-//				gpu_tasks,                  // struct tp_task  * gpu_tasks,
 				tp_task_size,               // int               num_tiles) // number of tiles in task
 				TILESX,                     // int               tilesx,             // number of tile rows
 				gpu_corr_indices,           // int             * gpu_corr_indices,   // packed tile+pair

--- a/src/tp_defines.h
+++ b/src/tp_defines.h
@@ -41,10 +41,10 @@
 #ifndef JCUDA
 #include <stdio.h>
 #define THREADSX              (DTT_SIZE)
-#define TEST_LWIR                      1
+#define TEST_LWIR                      0
 #define NUM_CAMS                      16 // now maximal number of cameras
 //#define NUM_PAIRS                      6
-#define NUM_COLORS                     1 //3
+//#define NUM_COLORS                     1 //3
 // kernels [num_cams][num_colors][KERNELS_HOR][KERNELS_VERT][4][64]
 #if TEST_LWIR
 	#define IMG_WIDTH                   640
@@ -72,7 +72,7 @@
 #define IMCLT_TILES_PER_BLOCK          4
 #define CORR_NTILE_SHIFT               8 // higher bits - number of a pair, other bits tile number
 // only lower bit will be used to request correlations, correlation mask will be common for all the scene
-#define CORR_PAIRS_MASK             0x3f// lower bits used to address correlation pair for the selected tile
+//#define CORR_PAIRS_MASK             0x3f// lower bits used to address correlation pair for the selected tile
 #define CORR_TEXTURE_BIT               7 // bit 7 used to request texture for the tile
 #define TASK_CORR_BITS                 4
 #define TASK_TEXTURE_N_BIT             0 // Texture with North neighbor
@@ -110,8 +110,8 @@
 	#define DBG_TILE_Y     36 // 88 // 121 // 69  // 111 // 66
 	#define DBG_TILE    (DBG_TILE_Y * 80 + DBG_TILE_X)
 #else
-	#define DBG_TILE_X     32 // 162 // 151 // 161 // 49
+	#define DBG_TILE_X     114 // 32 // 162 // 151 // 161 // 49
-	#define DBG_TILE_Y     88 // 121 // 69  // 111 // 66
+	#define DBG_TILE_Y     51  // 52  // 88 // 121 // 69  // 111 // 66
 	#define DBG_TILE    (DBG_TILE_Y * 324 + DBG_TILE_X)
 #endif
 #undef DBG_MARK_DBG_TILE