switched textures_accumulate to use grometry_correction too

ee71f035 · Andrey Filippov · d8f9defc · ee71f035 · ee71f035 · ee71f035
Commit ee71f035 authored Apr 17, 2020 by Andrey Filippov
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 47 deletions

TileProcessor.cuh src/TileProcessor.cuh +10 -11

TileProcessor.h src/TileProcessor.h +4 -6

test_tp.cu src/test_tp.cu +7 -30

No files found.
--- a/src/TileProcessor.cuh
+++ b/src/TileProcessor.cuh
@@ -1307,13 +1307,14 @@ __global__ void generate_RBGA(
 				printf("\n");
 #endif
 			    /* */
-			    textures_accumulate<<<grid_texture,threads_texture>>>(
+			    textures_accumulate <<<grid_texture,threads_texture>>>(
 			    		woi,                             // int             * woi,                // x, y, width,height
 						gpu_clt,                         // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 						ntt,                             // size_t            num_texture_tiles,  // number of texture tiles to process
 						gpu_texture_indices + ti_offset, // int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 //						gpu_port_offsets,                // float           * gpu_port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
-						(float *) gpu_geometry_correction ->pXY0,
+						gpu_geometry_correction,         // struct gc       * gpu_geometry_correction,
+//						(float *) gpu_geometry_correction ->pXY0,
 						colors,                          // int               colors,             // number of colors (3/1)
 						is_lwir,                         // int               is_lwir,            // do not perform shot correction
 						min_shot,                        // float             min_shot,           // 10.0
@@ -1321,9 +1322,7 @@ __global__ void generate_RBGA(
 						diff_sigma,                      // float             diff_sigma,         // pixel value/pixel change
 						diff_threshold,                  // float             diff_threshold,     // pixel value/pixel change
 						min_agree,                       // float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
-						weights[0],                      // float             weight0,            // scale for R
-						weights[1],                      // float             weight1,            // scale for B
-						weights[2],                      // float             weight2,            // scale for G
+						weights,                         // float             weights[3],         // scale for R,B,G
 						dust_remove,                     // int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
 			    		0,                               // int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
 			    // combining both non-overlap and overlap (each calculated if pointer is not null )
@@ -1774,7 +1773,7 @@ __global__ void textures_accumulate(
 		size_t            num_texture_tiles,  // number of texture tiles to process
 		int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 		// TODO: use geometry_correction rXY !
-		float           * gpu_port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
+		struct gc       * gpu_geometry_correction,
 		int               colors,             // number of colors (3/1)
 		int               is_lwir,            // do not perform shot correction
 		float             min_shot,           // 10.0
@@ -1782,9 +1781,7 @@ __global__ void textures_accumulate(
 		float             diff_sigma,         // pixel value/pixel change
 		float             diff_threshold,     // pixel value/pixel change
 		float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
-		float             weight0,            // scale for R
-		float             weight1,            // scale for B
-		float             weight2,            // scale for G
+		float             weights[3],         // scale for R,B,G
 		int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
 		int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
 // combining both non-overlap and overlap (each calculated if pointer is not null )
@@ -1794,7 +1791,8 @@ __global__ void textures_accumulate(
 		float           * gpu_texture_tiles)  // (number of colors +1 + ?)*16*16 rgba texture tiles

 {
-	float weights[3] = {weight0, weight1, weight2};
+	//						(float *) gpu_geometry_correction ->pXY0,
+//	float weights[3] = {weight0, weight1, weight2};
 	// will process exactly 4 cameras in one block (so this number is not adjustable here NUM_CAMS should be == 4 !
 	int camera_num = threadIdx.y;
 	int tile_indx = blockIdx.x; //  * TEXTURE_TILES_PER_BLOCK + tile_in_block;
@@ -1825,7 +1823,8 @@ __global__ void textures_accumulate(
 	__shared__ float ports_rgb   [NUM_CAMS][NUM_COLORS]; // return to system memory (optionally pass null to skip calculation)
 	__shared__ float max_diff [NUM_CAMS]; // return to system memory (optionally pass null to skip calculation)
 	if (threadIdx.x < 2){
-		port_offsets[camera_num][threadIdx.x] = * (gpu_port_offsets + 2 * camera_num + threadIdx.x);
+//		port_offsets[camera_num][threadIdx.x] = * (gpu_port_offsets + 2 * camera_num + threadIdx.x);
+		port_offsets[camera_num][threadIdx.x] = gpu_geometry_correction->rXY[camera_num][threadIdx.x];
 	}



--- a/src/TileProcessor.h
+++ b/src/TileProcessor.h
@@ -81,7 +81,7 @@ extern "C" __global__ void textures_accumulate(
 		size_t            num_texture_tiles,  // number of texture tiles to process
 		int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 		// TODO: use geometry_correction rXY !
-		float           * gpu_port_offsets,   // relative ports x,y offsets - just to scale differences, may be approximate
+		struct gc       * gpu_geometry_correction,
 		int               colors,             // number of colors (3/1)
 		int               is_lwir,            // do not perform shot correction
 		float             min_shot,           // 10.0
@@ -89,16 +89,14 @@ extern "C" __global__ void textures_accumulate(
 		float             diff_sigma,         // pixel value/pixel change
 		float             diff_threshold,     // pixel value/pixel change
 		float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
-		float             weight0,            // scale for R
-		float             weight1,            // scale for B
-		float             weight2,            // scale for G
+		float             weights[3],         // scale for R,B,G
 		int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
 		int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
-		// combining both non-overlap and overlap (each calculated if pointer is not null )
+// combining both non-overlap and overlap (each calculated if pointer is not null )
 		size_t            texture_rbg_stride, // in floats
 		float           * gpu_texture_rbg,    // (number of colors +1 + ?)*16*16 rgba texture tiles
 		size_t            texture_stride,     // in floats (now 256*4 = 1024)
-		float           * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
+		float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles

 extern "C"
 __global__ void imclt_rbg_all(

--- a/src/test_tp.cu
+++ b/src/test_tp.cu
@@ -993,7 +993,6 @@ int main(int argc, char **argv)
    		sdkResetTimer(&timerCORR);
    		sdkStartTimer(&timerCORR);
    	}
-#if 1
        correlate2D<<<1,1>>>(
 		gpu_clt,                    // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 		3,                          // int               colors,             // number of colors (3/1)
@@ -1008,21 +1007,6 @@ int main(int argc, char **argv)
 		dstride_corr/sizeof(float), // const size_t      corr_stride,        // in floats
 		CORR_OUT_RAD,               // int               corr_radius,        // radius of the output correlation (7 for 15x15)
 		gpu_corrs);                 // float           * gpu_corrs);          // correlation output data
-#else
-        dim3 grid_corr((num_corrs + CORR_TILES_PER_BLOCK-1) / CORR_TILES_PER_BLOCK,1,1);
-        correlate2D_inner<<<grid_corr,threads_corr>>>(
-		gpu_clt,   // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
-		3,         // int               colors,             // number of colors (3/1)
-		0.25,      // float             scale0,             // scale for R
-		0.25,      // float             scale1,             // scale for B
-		0.5,       // float             scale2,             // scale for G
-		30.0,      // float             fat_zero,           // here - absolute
-		num_corrs, // size_t            num_corr_tiles,     // number of correlation tiles to process
-		gpu_corr_indices, //  int             * gpu_corr_indices,   // packed tile+pair
-		dstride_corr/sizeof(float), // const size_t      corr_stride,        // in floats
-		CORR_OUT_RAD, // int               corr_radius,        // radius of the output correlation (7 for 15x15)
-		gpu_corrs); // float           * gpu_corrs);          // correlation output data
-#endif

    	getLastCudaError("Kernel failure");
    	checkCudaErrors(cudaDeviceSynchronize());
@@ -1090,12 +1074,12 @@ int main(int argc, char **argv)
 		// Channel1 weight = 0.117647
 		// Channel2 weight = 0.588235
    	textures_accumulate<<<grid_texture,threads_texture>>> (
-//    			0,          // int               border_tile,        // if 1 - watch for border
-    			(int *) 0,  //      int             * woi,                // x, y, width,height
+    			(int *) 0,             // int             * woi,                // x, y, width,height
 		        gpu_clt ,              // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 				num_textures,          // size_t            num_texture_tiles,  // number of texture tiles to process
+				// requires initialized gpu_texture_indices
 				gpu_texture_indices,   // int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
-				gpu_port_offsets,      // float           * port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
+				gpu_geometry_correction, // struct gc     * gpu_geometry_correction,
 				texture_colors,        // int               colors,             // number of colors (3/1)
 				(texture_colors == 1), // int               is_lwir,            // do not perform shot correction
 				10.0,                  // float             min_shot,           // 10.0
@@ -1103,9 +1087,7 @@ int main(int argc, char **argv)
 				1.5f,                  // float             diff_sigma,         // pixel value/pixel change
 				10.0f,                 // float             diff_threshold,     // pixel value/pixel change
 				3.0,                   // float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
-				0.294118,              // float             weight0,            // scale for R
-				0.117647,              // float             weight1,            // scale for B
-				0.588235,              // float             weight2,            // scale for G
+				gpu_color_weights,     // float             weights[3],         // scale for R
 				1,                     // int               dust_remove,        // Do not reduce average weight when only one image differes much from the average
 				keep_texture_weights,  // int               keep_weights,       // return channel weights after A in RGBA
    	// combining both non-overlap and overlap (each calculated if pointer is not null )
@@ -1166,7 +1148,7 @@ int main(int argc, char **argv)
 #endif // ifndef NOTEXTURES


-#define GEN_TEXTURE_LIST
+#undef GEN_TEXTURE_LIST
 #ifdef  GEN_TEXTURE_LIST
    		dim3 threads_list(1,1, 1); // TEXTURE_TILES_PER_BLOCK, 1);
    		dim3 grid_list   (1,1,1);
@@ -1269,7 +1251,7 @@ int main(int argc, char **argv)
    	// Parameters to generate texture tasks
                gpu_tasks,             // struct tp_task   * gpu_tasks,
                tp_task_size,          // int                num_tiles,          // number of tiles in task list
-    	// declare arrays in device code?
+		// Does not require initialized gpu_texture_indices to be initialized - just allocated, will generate.
 	            gpu_texture_indices,   // int              * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 	            gpu_num_texture_tiles, // int              * num_texture_tiles,  // number of texture tiles to process (8 elements)
 	            gpu_woi,               // int              * woi,                // x,y,width,height of the woi
@@ -1277,9 +1259,7 @@ int main(int argc, char **argv)
 	            TILESY,                // int                height); // <= TILESY, use for faster processing of LWIR images
    	// Parameters for the texture generation
 	            gpu_clt ,              // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
-//				(float *)
-				gpu_geometry_correction, // struct gc          * gpu_geometry_correction,
-//	            gpu_port_offsets,      // float           * port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
+				gpu_geometry_correction, // struct gc     * gpu_geometry_correction,
 	            texture_colors,        // int               colors,             // number of colors (3/1)
 	            (texture_colors == 1), // int               is_lwir,            // do not perform shot correction
 	            10.0,                  // float             min_shot,           // 10.0
@@ -1288,9 +1268,6 @@ int main(int argc, char **argv)
 	            10.0f,                 // float             diff_threshold,     // pixel value/pixel change
 	            3.0,                   // float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
 				gpu_color_weights,     // float             weights[3],         // scale for R
-//	            0.294118,              // float             weight0,            // scale for R
-//	            0.117647,              // float             weight1,            // scale for B
-//	            0.588235,              // float             weight2,            // scale for G
 	            1,                     // int               dust_remove,        // Do not reduce average weight when only one image differes much from the average
 	            0,                     // int               keep_weights,       // return channel weights after A in RGBA
 				dstride_textures_rbga/sizeof(float), // 	const size_t      texture_rbga_stride,     // in floats