switched textures_accumulate to use geometry_correction too

fbab1093 · Andrey Filippov · 8893c688 · fbab1093 · fbab1093 · fbab1093
Commit fbab1093 authored Apr 17, 2020 by Andrey Filippov
4 changed files
--- a/src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+++ b/src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
@@ -193,7 +193,7 @@ public class GPUTileProcessor {
    private CUdeviceptr gpu_num_corr_tiles =      new CUdeviceptr(); //  allocate tilesX * tilesY * 6 * Sizeof.POINTER
    private CUdeviceptr gpu_texture_indices =     new CUdeviceptr(); //  allocate tilesX * tilesY * 6 * Sizeof.POINTER

-    private CUdeviceptr gpu_port_offsets =        new CUdeviceptr(); //  allocate Quad * 2 * Sizeof.POINTER
+//    private CUdeviceptr gpu_port_offsets =        new CUdeviceptr(); //  allocate Quad * 2 * Sizeof.POINTER
    private CUdeviceptr gpu_color_weights =       new CUdeviceptr(); //  allocate Quad * 2 * Sizeof.POINTER

    private CUdeviceptr gpu_woi =                 new CUdeviceptr(); //  4 integers (x, y, width, height) Rectangle - in tiles
@@ -564,7 +564,7 @@ public class GPUTileProcessor {
 //    	cuMemAlloc(gpu_texture_indices,tilesX * tilesY * Sizeof.POINTER);
    	cuMemAlloc(gpu_texture_indices,tilesX * tilesYa * Sizeof.POINTER);

-    	cuMemAlloc(gpu_port_offsets,   NUM_CAMS * 2 * Sizeof.FLOAT);
+//    	cuMemAlloc(gpu_port_offsets,   NUM_CAMS * 2 * Sizeof.FLOAT);
    	cuMemAlloc(gpu_color_weights,             3 * Sizeof.FLOAT);


@@ -1261,7 +1261,6 @@ public class GPUTileProcessor {
    	    	// Parameters for the texture generation
    			Pointer.to(gpu_clt),                             // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
        		Pointer.to(gpu_geometry_correction),             //	struct gc          * gpu_geometry_correction,
-//	            Pointer.to(gpu_port_offsets),                    // float           * port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
 	            Pointer.to(new int[]   {num_colors}),            // int               colors,             // number of colors (3/1)
 	            Pointer.to(new int[]   {iis_lwir}),              // int               is_lwir,            // do not perform shot correction
 	            Pointer.to(new float[] {(float) min_shot}),      // float             min_shot,           // 10.0
@@ -1285,7 +1284,6 @@ public class GPUTileProcessor {
    }

    public void execTextures(
-//    		double [][] port_offsets,
    		double [] color_weights,
    		boolean   is_lwir,
    		double    min_shot,           // 10.0
@@ -1300,88 +1298,15 @@ public class GPUTileProcessor {
    		IJ.showMessage("Error", "No GPU kernel: GPU_TEXTURES_kernel");
    		return;
    	}
-//    	float [] fport_offsets = new float[port_offsets.length * 2];
-//    	for (int cam = 0; cam < port_offsets.length; cam++) {
-//    		fport_offsets[2*cam + 0] = (float) port_offsets[cam][0];
-//    		fport_offsets[2*cam + 1] = (float) port_offsets[cam][1];
-//    	}
-//        cuMemcpyHtoD(gpu_port_offsets, Pointer.to(fport_offsets),  fport_offsets.length * Sizeof.FLOAT);

    	int num_colors = color_weights.length;
    	if (num_colors > 3) num_colors = 3;
-    	float weighht0 = (float) color_weights[0];
-    	float weighht1 = (num_colors >1)?((float) color_weights[1]):0.0f;
-    	float weighht2 = (num_colors >2)?((float) color_weights[2]):0.0f;
-    	int iis_lwir =      (is_lwir)? 1:0;
-    	int idust_remove =  (dust_remove)? 1 : 0;
-    	int ikeep_weights = (keep_weights)? 1 : 0;
-
-		int [] GridFullWarps =    {(num_texture_tiles + TEXTURE_TILES_PER_BLOCK-1) / TEXTURE_TILES_PER_BLOCK,1,1};
-    	int [] ThreadsFullWarps = {TEXTURE_THREADS_PER_TILE, NUM_CAMS, 1};
-
-    	Pointer kernelParameters = Pointer.to(
-//    			Pointer.to(new int[] {0}), // 0,          // int               border_tile,        // if 1 - watch for border
-    			Pointer.to(new int[] {0}), //  int             * woi, - not used
-    			Pointer.to(gpu_clt),
-    			Pointer.to(new int[] { num_texture_tiles }),
-    			Pointer.to(gpu_texture_indices),
-//    			Pointer.to(gpu_port_offsets),
-        		Pointer.to(gpu_geometry_correction),     //	struct gc          * gpu_geometry_correction,
-    			Pointer.to(new int[] { num_colors }),
-    			Pointer.to(new int[] { iis_lwir }),
-    			Pointer.to(new float[] {(float) min_shot }),
-    			Pointer.to(new float[] {(float) scale_shot }),
-    			Pointer.to(new float[] {(float) diff_sigma }),
-    			Pointer.to(new float[] {(float) diff_threshold }),
-    			Pointer.to(new float[] {(float) min_agree }),
-    			Pointer.to(new float[] {weighht0 }),
-    			Pointer.to(new float[] {weighht1 }),
-    			Pointer.to(new float[] {weighht2 }),
-    			Pointer.to(new int[] { idust_remove }),
-    			Pointer.to(new int[] { ikeep_weights }),
-    			Pointer.to(new int[] {0}),//  0, // const size_t      texture_rbg_stride, // in floats - DISABLE GENERATION!
-    			Pointer.to(new int[] {0}), // null, //  new Pointer(),  //Pointer.to(gpu_textures), // new Pointer(),  // Pointer.to(gpu_textures),
-    			Pointer.to(new int[] { texture_stride }), // can be a null pointer - will not be used! float           * gpu_texture_rbg,     // (number of colors +1 + ?)*16*16 rgba texture tiles
-    			Pointer.to(gpu_textures)
-    			);
-    	cuCtxSynchronize();
-    	// Call the kernel function
-    	cuLaunchKernel(GPU_TEXTURES_kernel,
-    			GridFullWarps[0],    GridFullWarps[1],   GridFullWarps[2],   // Grid dimension
-    			ThreadsFullWarps[0], ThreadsFullWarps[1],ThreadsFullWarps[2],// Block dimension
-    			0, null,                 // Shared memory size and stream (shared - only dynamic, static is in code)
-    			kernelParameters, null);   // Kernel- and extra parameters
-    	cuCtxSynchronize();
-    }
-
-    public void execTextures(
-    		double [][] port_offsets,
-    		double [] color_weights,
-    		boolean   is_lwir,
-    		double    min_shot,           // 10.0
-    		double    scale_shot,         // 3.0
-    		double    diff_sigma,         // pixel value/pixel change
-    		double    diff_threshold,     // pixel value/pixel change
-    		double    min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
-    		boolean   dust_remove,
-    		boolean   keep_weights) {
-    	if (GPU_TEXTURES_kernel == null)
-    	{
-    		IJ.showMessage("Error", "No GPU kernel: GPU_TEXTURES_kernel");
-    		return;
-    	}
-    	float [] fport_offsets = new float[port_offsets.length * 2];
-    	for (int cam = 0; cam < port_offsets.length; cam++) {
-    		fport_offsets[2*cam + 0] = (float) port_offsets[cam][0];
-    		fport_offsets[2*cam + 1] = (float) port_offsets[cam][1];
-    	}
-        cuMemcpyHtoD(gpu_port_offsets, Pointer.to(fport_offsets),  fport_offsets.length * Sizeof.FLOAT);
+    	float [] fcolor_weights = new float[3];
+    	fcolor_weights[0] = (float) color_weights[0];
+    	fcolor_weights[1] = (num_colors >1)?((float) color_weights[1]):0.0f;
+    	fcolor_weights[2] = (num_colors >2)?((float) color_weights[2]):0.0f;
+        cuMemcpyHtoD(gpu_color_weights, Pointer.to(fcolor_weights),  fcolor_weights.length * Sizeof.FLOAT);

-    	int num_colors = color_weights.length;
-    	if (num_colors > 3) num_colors = 3;
-    	float weighht0 = (float) color_weights[0];
-    	float weighht1 = (num_colors >1)?((float) color_weights[1]):0.0f;
-    	float weighht2 = (num_colors >2)?((float) color_weights[2]):0.0f;
    	int iis_lwir =      (is_lwir)? 1:0;
    	int idust_remove =  (dust_remove)? 1 : 0;
    	int ikeep_weights = (keep_weights)? 1 : 0;
@@ -1390,13 +1315,11 @@ public class GPUTileProcessor {
    	int [] ThreadsFullWarps = {TEXTURE_THREADS_PER_TILE, NUM_CAMS, 1};

    	Pointer kernelParameters = Pointer.to(
-//    			Pointer.to(new int[] {0}), // 0,          // int               border_tile,        // if 1 - watch for border
    			Pointer.to(new int[] {0}), //  int             * woi, - not used
    			Pointer.to(gpu_clt),
    			Pointer.to(new int[] { num_texture_tiles }),
    			Pointer.to(gpu_texture_indices),
-    			Pointer.to(gpu_port_offsets),
-//        		Pointer.to(gpu_geometry_correction),     //	struct gc          * gpu_geometry_correction,
+        		Pointer.to(gpu_geometry_correction),     //	struct gc          * gpu_geometry_correction,
    			Pointer.to(new int[] { num_colors }),
    			Pointer.to(new int[] { iis_lwir }),
    			Pointer.to(new float[] {(float) min_shot }),
@@ -1404,9 +1327,7 @@ public class GPUTileProcessor {
    			Pointer.to(new float[] {(float) diff_sigma }),
    			Pointer.to(new float[] {(float) diff_threshold }),
    			Pointer.to(new float[] {(float) min_agree }),
-    			Pointer.to(new float[] {weighht0 }),
-    			Pointer.to(new float[] {weighht1 }),
-    			Pointer.to(new float[] {weighht2 }),
+	            Pointer.to(gpu_color_weights),                   // float             weights[3],         // scale for R,B,G
    			Pointer.to(new int[] { idust_remove }),
    			Pointer.to(new int[] { ikeep_weights }),
    			Pointer.to(new int[] {0}),//  0, // const size_t      texture_rbg_stride, // in floats - DISABLE GENERATION!

--- a/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+++ b/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
@@ -2092,13 +2092,13 @@ public class TwoQuadCLT {
 				false); // boolean use_java_rByRDist) { // false - use newer GPU execCalcReverseDistortions); // once
 		gPUTileProcessor.setExtrinsicsVector(quadCLT_main.getGeometryCorrection().getCorrVector()); // for each new image

-		// TODO: calculate from the camera geometry?
+/*		// TODO: calculate from the camera geometry?
 		double[][] port_offsets = { // used only in textures to scale differences
 				{-0.5, -0.5},
 				{ 0.5, -0.5},
 				{-0.5,  0.5},
 				{ 0.5,  0.5}};
-
+*/
 		// All set, run kernel (correct and convert)
 		int NREPEAT = 1; // 00;
 		System.out.println("\n------------ Running GPU "+NREPEAT+" times ----------------");
@@ -2139,7 +2139,6 @@ public class TwoQuadCLT {
 // run textures
 		long startTextures = System.nanoTime();   // System.nanoTime();
 		for (int i = 0; i < NREPEAT; i++ ) gPUTileProcessor.execTextures(
-/** remove when done **/				port_offsets,                  // double [][] port_offsets,
 				col_weights,                   // double [] color_weights,
 				quadCLT_main.isLwir(),         // boolean   is_lwir,
 				clt_parameters.min_shot,       // double    min_shot,           // 10.0

--- a/src/main/resources/kernels/TileProcessor.cuh
+++ b/src/main/resources/kernels/TileProcessor.cuh
@@ -1307,13 +1307,14 @@ __global__ void generate_RBGA(
 				printf("\n");
 #endif
 			    /* */
-			    textures_accumulate<<<grid_texture,threads_texture>>>(
+			    textures_accumulate <<<grid_texture,threads_texture>>>(
 			    		woi,                             // int             * woi,                // x, y, width,height
 						gpu_clt,                         // float          ** gpu_clt,            // [NUM_CAMS] ->[TILESY][TILESX][NUM_COLORS][DTT_SIZE*DTT_SIZE]
 						ntt,                             // size_t            num_texture_tiles,  // number of texture tiles to process
 						gpu_texture_indices + ti_offset, // int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 //						gpu_port_offsets,                // float           * gpu_port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
-						(float *) gpu_geometry_correction ->pXY0,
+						gpu_geometry_correction,         // struct gc       * gpu_geometry_correction,
+//						(float *) gpu_geometry_correction ->pXY0,
 						colors,                          // int               colors,             // number of colors (3/1)
 						is_lwir,                         // int               is_lwir,            // do not perform shot correction
 						min_shot,                        // float             min_shot,           // 10.0
@@ -1321,9 +1322,7 @@ __global__ void generate_RBGA(
 						diff_sigma,                      // float             diff_sigma,         // pixel value/pixel change
 						diff_threshold,                  // float             diff_threshold,     // pixel value/pixel change
 						min_agree,                       // float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
-						weights[0],                      // float             weight0,            // scale for R
-						weights[1],                      // float             weight1,            // scale for B
-						weights[2],                      // float             weight2,            // scale for G
+						weights,                         // float             weights[3],         // scale for R,B,G
 						dust_remove,                     // int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
 			    		0,                               // int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
 			    // combining both non-overlap and overlap (each calculated if pointer is not null )
@@ -1774,7 +1773,7 @@ __global__ void textures_accumulate(
 		size_t            num_texture_tiles,  // number of texture tiles to process
 		int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 		// TODO: use geometry_correction rXY !
-		float           * gpu_port_offsets,       // relative ports x,y offsets - just to scale differences, may be approximate
+		struct gc       * gpu_geometry_correction,
 		int               colors,             // number of colors (3/1)
 		int               is_lwir,            // do not perform shot correction
 		float             min_shot,           // 10.0
@@ -1782,9 +1781,7 @@ __global__ void textures_accumulate(
 		float             diff_sigma,         // pixel value/pixel change
 		float             diff_threshold,     // pixel value/pixel change
 		float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
-		float             weight0,            // scale for R
-		float             weight1,            // scale for B
-		float             weight2,            // scale for G
+		float             weights[3],         // scale for R,B,G
 		int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
 		int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
 // combining both non-overlap and overlap (each calculated if pointer is not null )
@@ -1794,7 +1791,8 @@ __global__ void textures_accumulate(
 		float           * gpu_texture_tiles)  // (number of colors +1 + ?)*16*16 rgba texture tiles

 {
-	float weights[3] = {weight0, weight1, weight2};
+	//						(float *) gpu_geometry_correction ->pXY0,
+//	float weights[3] = {weight0, weight1, weight2};
 	// will process exactly 4 cameras in one block (so this number is not adjustable here NUM_CAMS should be == 4 !
 	int camera_num = threadIdx.y;
 	int tile_indx = blockIdx.x; //  * TEXTURE_TILES_PER_BLOCK + tile_in_block;
@@ -1825,7 +1823,8 @@ __global__ void textures_accumulate(
 	__shared__ float ports_rgb   [NUM_CAMS][NUM_COLORS]; // return to system memory (optionally pass null to skip calculation)
 	__shared__ float max_diff [NUM_CAMS]; // return to system memory (optionally pass null to skip calculation)
 	if (threadIdx.x < 2){
-		port_offsets[camera_num][threadIdx.x] = * (gpu_port_offsets + 2 * camera_num + threadIdx.x);
+//		port_offsets[camera_num][threadIdx.x] = * (gpu_port_offsets + 2 * camera_num + threadIdx.x);
+		port_offsets[camera_num][threadIdx.x] = gpu_geometry_correction->rXY[camera_num][threadIdx.x];
 	}



--- a/src/main/resources/kernels/TileProcessor.h
+++ b/src/main/resources/kernels/TileProcessor.h
@@ -81,7 +81,7 @@ extern "C" __global__ void textures_accumulate(
 		size_t            num_texture_tiles,  // number of texture tiles to process
 		int             * gpu_texture_indices,// packed tile + bits (now only (1 << 7)
 		// TODO: use geometry_correction rXY !
-		float           * gpu_port_offsets,   // relative ports x,y offsets - just to scale differences, may be approximate
+		struct gc       * gpu_geometry_correction,
 		int               colors,             // number of colors (3/1)
 		int               is_lwir,            // do not perform shot correction
 		float             min_shot,           // 10.0
@@ -89,16 +89,14 @@ extern "C" __global__ void textures_accumulate(
 		float             diff_sigma,         // pixel value/pixel change
 		float             diff_threshold,     // pixel value/pixel change
 		float             min_agree,          // minimal number of channels to agree on a point (real number to work with fuzzy averages)
-		float             weight0,            // scale for R
-		float             weight1,            // scale for B
-		float             weight2,            // scale for G
+		float             weights[3],         // scale for R,B,G
 		int               dust_remove,        // Do not reduce average weight when only one image differs much from the average
 		int               keep_weights,       // return channel weights after A in RGBA (was removed) (should be 0 if gpu_texture_rbg)?
-		// combining both non-overlap and overlap (each calculated if pointer is not null )
+// combining both non-overlap and overlap (each calculated if pointer is not null )
 		size_t            texture_rbg_stride, // in floats
 		float           * gpu_texture_rbg,    // (number of colors +1 + ?)*16*16 rgba texture tiles
 		size_t            texture_stride,     // in floats (now 256*4 = 1024)
-		float           * gpu_texture_tiles); // (number of colors +1 + ?)*16*16 rgba texture tiles
+		float           * gpu_texture_tiles);  // (number of colors +1 + ?)*16*16 rgba texture tiles

 extern "C"
 __global__ void imclt_rbg_all(