converting to varaible num_cams

ee0cfc3b · Andrey Filippov · 18d8e56b · ee0cfc3b · ee0cfc3b · ee0cfc3b
Commit ee0cfc3b authored Nov 24, 2021 by Andrey Filippov
Showing with 935 additions and 435 deletions

TileProcessor.cuh src/TileProcessor.cuh +725 -362

TileProcessor.h src/TileProcessor.h +11 -3

test_tp.cu src/test_tp.cu +194 -68

tp_defines.h src/tp_defines.h +5 -2

No files found.
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
--- a/src/TileProcessor.h
+++ b/src/TileProcessor.h
@@ -44,10 +44,13 @@
 extern "C" __global__ void convert_direct( // called with a single block, single thread
 		//		struct CltExtra ** gpu_kernel_offsets, // [NUM_CAMS], // changed for jcuda to avoid struct parameters
+		int                num_cams,           // actual number of cameras
+		int                num_colors,         // actual number of colors: 3 for RGB, 1 for LWIR/mono
 		float           ** gpu_kernel_offsets, // [NUM_CAMS],
 		float           ** gpu_kernels,        // [NUM_CAMS],
 		float           ** gpu_images,         // [NUM_CAMS],
-		struct tp_task   * gpu_tasks,
+		float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+//		struct tp_task   * gpu_tasks,
 		float           ** gpu_clt,            // [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 		size_t             dstride,            // in floats (pixels)
 		int                num_tiles,          // number of tiles in task
@@ -60,15 +63,17 @@ extern "C" __global__ void convert_direct( // called with a single block, single
 		int *              pnum_active_tiles,  //  indices to gpu_tasks
 		int                tilesx);
 extern "C" __global__ void correlate2D(
+		int               num_cams,
+		int *             sel_pairs,
 		float          ** gpu_clt,            // [NUM_CAMS] ->[TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 		int               colors,             // number of colors (3/1)
 		float             scale0,             // scale for R
 		float             scale1,             // scale for B
 		float             scale2,             // scale for G
 		float             fat_zero,           // here - absolute
-		struct tp_task  * gpu_tasks,          // array of per-tile tasks (now bits 4..9 - correlation pairs)
+		float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+//		struct tp_task  * gpu_tasks,          // array of per-tile tasks (now bits 4..9 - correlation pairs)
 		int               num_tiles,          // number of tiles in task
 		int               tilesx,             // number of tile rows
 		int             * gpu_corr_indices,   // packed tile+pair
@@ -99,6 +104,7 @@ extern "C" __global__ void corr2D_combine(
 		float           * gpu_corrs_combo);   // combined correlation output (one per tile)
 extern "C" __global__ void textures_nonoverlap(
+		int                num_cams,           // number of cameras used
 		struct tp_task  * gpu_tasks,
 		int               num_tiles,          // number of tiles in task list
 //		int               num_tilesx,         // number of tiles in a row
@@ -121,6 +127,7 @@ extern "C" __global__ void textures_nonoverlap(
 extern "C"
 __global__ void imclt_rbg_all(
+		int                num_cams,
 		float           ** gpu_clt,            // [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 		float           ** gpu_corr_images,    // [NUM_CAMS][WIDTH, 3 * HEIGHT]
 		int                apply_lpf,
@@ -142,6 +149,7 @@ extern "C" __global__ void imclt_rbg(
 		const size_t      dstride);            // in floats (pixels)
 extern "C" __global__ void generate_RBGA(
+		int                num_cams,           // number of cameras used
 		// Parameters to generate texture tasks
 		struct tp_task   * gpu_tasks,
 		int                num_tiles,          // number of tiles in task list

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
--- a/src/tp_defines.h
+++ b/src/tp_defines.h
@@ -41,9 +41,10 @@
 #ifndef JCUDA
 #include <stdio.h>
 #define THREADSX              (DTT_SIZE)
-#define NUM_CAMS                       4
+#define TEST_LWIR                      1
+#define NUM_CAMS                      16 // now maximal number of cameras
 #define NUM_PAIRS                      6
-#define NUM_COLORS                     3
+#define NUM_COLORS                     1 //3
 #define IMG_WIDTH                   2592
 #define IMG_HEIGHT                  1936
 #define KERNELS_HOR                  164
@@ -55,11 +56,13 @@
 #define CORR_TILES_PER_BLOCK           4
 #define CORR_TILES_PER_BLOCK_NORMALIZE 4 // increase to 8?
 #define CORR_TILES_PER_BLOCK_COMBINE   4 // increase to 16?
+#define TEXTURE_THREADS               32 //
 #define TEXTURE_THREADS_PER_TILE       8
 #define TEXTURE_TILES_PER_BLOCK        1
 #define IMCLT_THREADS_PER_TILE        16
 #define IMCLT_TILES_PER_BLOCK          4
 #define CORR_NTILE_SHIFT               8 // higher bits - number of a pair, other bits tile number
+// only lower bit will be used to request correlations, correlation mask will be common for all the scene
 #define CORR_PAIRS_MASK             0x3f// lower bits used to address correlation pair for the selected tile
 #define CORR_TEXTURE_BIT               7 // bit 7 used to request texture for the tile
 #define TASK_CORR_BITS                 4