debugging code to generate data for macroblocks correlation

8caaa2db · Andrey Filippov · fdc9840a · 8caaa2db · 8caaa2db · 8caaa2db
Commit 8caaa2db authored Apr 18, 2020 by Andrey Filippov
Showing with 322 additions and 94 deletions

TileProcessor.cuh src/TileProcessor.cuh +294 -68

TileProcessor.h src/TileProcessor.h +10 -11

test_tp.cu src/test_tp.cu +13 -14

tp_defines.h src/tp_defines.h +5 -1

No files found.
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
--- a/src/TileProcessor.h
+++ b/src/TileProcessor.h
@@ -75,11 +75,13 @@ extern "C" __global__ void correlate2D(
 		float           * gpu_corrs);          // correlation output data
-extern "C" __global__ void textures_accumulate(
+extern "C" __global__ void textures_nonoverlap(
-		int             * woi,                // x, y, width,height
+		struct tp_task  * gpu_tasks,
-		float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
+		int               num_tiles,          // number of tiles in task list
-		size_t            num_texture_tiles,  // number of texture tiles to process
+// declare arrays in device code?
 		int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
+		int             * pnum_texture_tiles,  // returns total number of elements in gpu_texture_indices array
+		float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 		// TODO: use geometry_correction rXY !
 		struct gc       * gpu_geometry_correction,
 		int               colors,             // number of colors (3/1)
@@ -91,14 +93,11 @@ extern "C" __global__ void textures_accumulate(
 		float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
 		float             weights[3],         // scale for R,B,G
 		int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
-		int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
+//		int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
 // combining both non-overlap and overlap (each calculated if pointer is not null )
-		size_t            texture_rbg_stride, // in floats
+		size_t            texture_stride,     // in floats (now 256*4 = 1024)  // may be 0 if not needed
-		float           * gpu_texture_rbg,    // (number of colors +1 + ?)*16*16 rgba texture tiles
+		float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles    // may be 0 if not needed
-		size_t            texture_stride,     // in floats (now 256*4 = 1024)
+		float           * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
-		float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles
-		float           * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
 extern "C"
 __global__ void imclt_rbg_all(

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -1075,12 +1075,14 @@ int main(int argc, char **argv)
 		// Channel0 weight = 0.294118
 		// Channel1 weight = 0.117647
 		// Channel2 weight = 0.588235
-    	textures_accumulate<<<grid_texture,threads_texture>>> (
+    	textures_nonoverlap<<<1,1>>> (
-    			(int *) 0,             // int             * woi,                // x, y, width,height
+                gpu_tasks,             // struct tp_task   * gpu_tasks,
-		        gpu_clt ,              // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
+                tp_task_size,          // int                num_tiles,          // number of tiles in task list
-				num_textures,          // size_t            num_texture_tiles,  // number of texture tiles to process
+    	// declare arrays in device code?
-				// requires initialized gpu_texture_indices
 				gpu_texture_indices,   // int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
+				gpu_num_texture_tiles, // int             * pnum_texture_tiles,  // returns total number of elements in gpu_texture_indices array
+		        gpu_clt ,              // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
+    			// TODO: use geometry_correction rXY !
 				gpu_geometry_correction, // struct gc     * gpu_geometry_correction,
 				texture_colors,        // int               colors,             // number of colors (3/1)
 				(texture_colors == 1), // int               is_lwir,            // do not perform shot correction
@@ -1091,14 +1093,11 @@ int main(int argc, char **argv)
 				3.0,                   // float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
 				gpu_color_weights,     // float             weights[3],         // scale for R
 				1,                     // int               dust_remove,        // Do not reduce average weight when only one image differes much from the average
-				keep_texture_weights,  // int               keep_weights,       // return channel weights after A in RGBA
    	// combining both non-overlap and overlap (each calculated if pointer is not null )
-    			0, // const size_t      texture_rbg_stride, // in floats
+				0, // dstride_textures/sizeof(float), // size_t            texture_stride,     // in floats (now 256*4 = 1024)  // may be 0 if not needed
-    			(float *) 0, // float           * gpu_texture_rbg,     // (number of colors +1 + ?)*16*16 rgba texture tiles
+//				gpu_textures,         // float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles    // may be 0 if not needed
-				dstride_textures/sizeof(float), // const size_t      texture_stride,     // in floats (now 256*4 = 1024)
+				(float *) 0,          // gpu_textures,         // float           * gpu_texture_tiles,  // (number of colors +1 + ?)*16*16 rgba texture tiles    // may be 0 if not needed
-				gpu_textures,    // float           * gpu_texture_tiles);  // 4*16*16 rgba texture tiles
+				gpu_diff_rgb_combo);  // float           * gpu_diff_rgb_combo); // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS] // may be 0 if not needed
-				gpu_diff_rgb_combo);             // float           * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
    	getLastCudaError("Kernel failure");
    	checkCudaErrors(cudaDeviceSynchronize());
    	printf("test pass: %d\n",i);
@@ -1251,7 +1250,7 @@ int main(int argc, char **argv)
    		sdkStartTimer(&timerRGBA);
    	}
-    	generate_RBGA<<<grid_rgba,threads_rgba>>> (
+    	generate_RBGA<<<1,1>>> (
    	// Parameters to generate texture tasks
                gpu_tasks,             // struct tp_task   * gpu_tasks,
                tp_task_size,          // int                num_tiles,          // number of tiles in task list
@@ -1276,7 +1275,7 @@ int main(int argc, char **argv)
 	            0,                     // int               keep_weights,       // return channel weights after A in RGBA
 				dstride_textures_rbga/sizeof(float), // 	const size_t      texture_rbga_stride,     // in floats
 				gpu_textures_rbga,     // 	float           * gpu_texture_tiles)    // (number of colors +1 + ?)*16*16 rgba texture tiles
-				gpu_diff_rgb_combo);   // float           * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
+				(float *) 0 ); // gpu_diff_rgb_combo);   // float           * gpu_diff_rgb_combo) // diff[NUM_CAMS], R[NUM_CAMS], B[NUM_CAMS],G[NUM_CAMS]
    	getLastCudaError("Kernel failure");
    	checkCudaErrors(cudaDeviceSynchronize());

--- a/src/tp_defines.h
+++ b/src/tp_defines.h
@@ -114,8 +114,12 @@
 // geom
 //#define DEBUG20 1
-#define DEBUG21 1
+// #define DEBUG21 1 // Geometry Correction
+#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
+#define DEBUG22 1
+#endif //#if (DBG_TILE_X >= 0) && (DBG_TILE_Y >= 0)
 #endif //#ifndef JCUDA