Before adding per-tile weghts for fat zeros

53f7f2ae · Andrey Filippov · 12758c3e · 53f7f2ae · 53f7f2ae · 53f7f2ae
Commit 53f7f2ae authored Dec 14, 2021 by Andrey Filippov
Showing with 67 additions and 22 deletions

TileProcessor.cuh src/TileProcessor.cuh +48 -10

TileProcessor.h src/TileProcessor.h +9 -4

test_tp.cu src/test_tp.cu +9 -7

tp_defines.h src/tp_defines.h +1 -1

No files found.
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
--- a/src/TileProcessor.h
+++ b/src/TileProcessor.h
@@ -49,7 +49,7 @@ extern "C" __global__ void convert_direct( // called with a single block, single
 		float           ** gpu_kernel_offsets, // [NUM_CAMS],
 		float           ** gpu_kernels,        // [NUM_CAMS],
 		float           ** gpu_images,         // [NUM_CAMS],
-		float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+		float            * gpu_ftasks,         // flattened tasks, 29 floats for quad EO, 101 floats for LWIR16
 //		struct tp_task   * gpu_tasks,
 		float           ** gpu_clt,            // [NUM_CAMS][TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 		size_t             dstride,            // in floats (pixels)
@@ -76,7 +76,7 @@ extern "C" __global__ void correlate2D(
 		float             scale1,             // scale for B
 		float             scale2,             // scale for G
 		float             fat_zero,           // here - absolute
-		float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+		float            * gpu_ftasks,         // flattened tasks, 29 floats for quad EO, 101 floats for LWIR16
 //		struct tp_task  * gpu_tasks,          // array of per-tile tasks (now bits 4..9 - correlation pairs)
 		int               num_tiles,          // number of tiles in task
 		int               tilesx,             // number of tile rows
@@ -109,7 +109,7 @@ extern "C" __global__ void corr2D_combine(
 extern "C" __global__ void textures_nonoverlap(
 		int               num_cams,           // number of cameras
-		float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats
+		float            * gpu_ftasks,         // flattened tasks, 29 floats for quad EO, 101 floats
 		//		struct tp_task  * gpu_tasks,
 		int               num_tiles,          // number of tiles in task list
 //		int               num_tilesx,         // number of tiles in a row
@@ -127,6 +127,7 @@ extern "C" __global__ void textures_nonoverlap(
 // combining both non-overlap and overlap (each calculated if pointer is not null )
 		size_t            texture_stride,     // in floats (now 256*4 = 1024)  // may be 0 if not needed
 		float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles    // may be 0 if not needed
+		int               linescan_order,     // 0 low-res tiles have tghe same order, as gpu_texture_indices, 1 - in linescan order
 		float           * gpu_diff_rgb_combo, //); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
 		int               num_tilesx);
@@ -141,6 +142,10 @@ __global__ void imclt_rbg_all(
 		int                woi_theight,
 		const size_t       dstride);            // in floats (pixels)
+extern "C" __global__ void erase8x8(
+		float           * gpu_top_left,
+		const size_t      dstride);
 extern "C" __global__ void imclt_rbg(
 		float           * gpu_clt,            // [TILES-Y][TILES-X][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 		float           * gpu_rbg,            // WIDTH, 3 * HEIGHT
@@ -156,7 +161,7 @@ extern "C" __global__ void imclt_rbg(
 extern "C" __global__ void generate_RBGA(
 		int                num_cams,           // number of cameras used
 		// Parameters to generate texture tasks
-		float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
+		float            * gpu_ftasks,         // flattened tasks, 29 floats for quad EO, 101 floats for LWIR16
 //		struct tp_task   * gpu_tasks,
 		int                num_tiles,          // number of tiles in task list
 		// declare arrays in device code?

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -31,8 +31,8 @@
 */
 #define NOCORR
-#define NOCORR_TD
+//#define NOCORR_TD
-#define NOTEXTURES_HOST
+//#define NOTEXTURES_HOST
 #define NOTEXTURES
 #define NOTEXTURE_RGBA
 #define SAVE_CLT
@@ -492,7 +492,9 @@ void generate_RBGA_host(
 				 gpu_woi,                             // int             * woi,                // x, y, width,height
 				 gpu_clt,                         // float          ** gpu_clt,            // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
 				 ntt,                             // size_t            num_texture_tiles,  // number of texture tiles to process
-				 gpu_texture_indices + ti_offset, // int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
+				 ti_offset,                       //                gpu_texture_indices_offset,// add to gpu_texture_indices
+				 gpu_texture_indices, //  + ti_offset, // int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
+//				 gpu_texture_indices + ti_offset, // int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 				 gpu_geometry_correction,         // struct gc       * gpu_geometry_correction,
 				 colors,                          // int               colors,             // number of colors (3/1)
 				 is_lwir,                         // int               is_lwir,            // do not perform shot correction
@@ -1626,7 +1628,6 @@ int main(int argc, char **argv)
    	// FIXME: provide sel_pairs
        correlate2D<<<1,1>>>( // output TD tiles, no normalization
        		num_cams,                      // int               num_cams,
-				//				0,                             // int *             sel_pairs,           // unused bits should be 0
 				sel_pairs[0], // int               sel_pairs0           // unused bits should be 0
 				sel_pairs[1], // int               sel_pairs1,           // unused bits should be 0
 				sel_pairs[2], // int               sel_pairs2,           // unused bits should be 0
@@ -1638,7 +1639,6 @@ int main(int argc, char **argv)
 				color_weights[2], // 0.5,      // float             scale2,             // scale for G
 				30.0,                          // float             fat_zero,           // here - absolute
 				gpu_ftasks,                    // float            * gpu_ftasks,         // flattened tasks, 27 floats for quad EO, 99 floats for LWIR16
-//				gpu_tasks,                     // struct tp_task  * gpu_tasks,
 				tp_task_size,                  // int               num_tiles) // number of tiles in task
 				TILESX,                        // int               tilesx,             // number of tile rows
 				gpu_corr_indices,              // int             * gpu_corr_indices,   // packed tile+pair
@@ -1795,6 +1795,7 @@ int main(int argc, char **argv)
 							(int *) 0,                       // int             * woi,                // x, y, width,height
 							gpu_clt,                         // float          ** gpu_clt,            // [num_cams] ->[TILES-Y][TILES-X][colors][DTT_SIZE*DTT_SIZE]
 							cpu_pnum_texture_tiles, // *pnum_texture_tiles,             // size_t            num_texture_tiles,  // number of texture tiles to process
+							0,                               //                gpu_texture_indices_offset,// add to gpu_texture_indices
 							gpu_texture_indices,             // int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 							gpu_geometry_correction,         // struct gc       * gpu_geometry_correction,
 							texture_colors,                  // int               colors,             // number of colors (3/1)
@@ -1949,7 +1950,7 @@ int main(int argc, char **argv)
 //    printf("grid_texture=(%d, %d, %d)\n",grid_texture.x,grid_texture.y,grid_texture.z);
    StopWatchInterface *timerTEXTURE = 0;
    sdkCreateTimer(&timerTEXTURE);
+	int  linescan_order = 1; // output low-res in linescan order, 0 - in gpu_texture_indices order
    for (int i = i0; i < numIterations; i++)
    {
    	if (i == 0)
@@ -1986,7 +1987,8 @@ int main(int argc, char **argv)
 				1,                     // int               dust_remove,        // Do not reduce average weight when only one image differes much from the average
    	// combining both non-overlap and overlap (each calculated if pointer is not null )
 				0, // dstride_textures/sizeof(float), // size_t            texture_stride,     // in floats (now 256*4 = 1024)  // may be 0 if not needed
-				(float *) 0,          // gpu_textures,         // float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles    // may be 0 if not needed
+				(float *) 0,          // gpu_textures,         // float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles    // may be 0 if not needed\
+				linescan_order,       // int               linescan_order,     // 0 low-res tiles have tghe same order, as gpu_texture_indices, 1 - in linescan order
 				gpu_diff_rgb_combo, //);  // float           * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
 				TILESX);
    	getLastCudaError("Kernel failure");

--- a/src/tp_defines.h
+++ b/src/tp_defines.h
@@ -134,7 +134,7 @@
 #define DEBUG8 1
 #define DEBUG9 1
 */
-#define DEBUG8A 1
+//#define DEBUG8A 1 // generate_RBGA_host
 //textures
 //#define DEBUG10 1
 //#define DEBUG11 1