comparing disp_dist and ers between java and gpu

0777a4a7 · Andrey Filippov · 1b16c1e5 · 0777a4a7 · 0777a4a7 · 0777a4a7
Commit 0777a4a7 authored Aug 28, 2020 by Andrey Filippov
5 changed files
--- a/src/main/java/com/elphel/imagej/cameras/CLTParameters.java
+++ b/src/main/java/com/elphel/imagej/cameras/CLTParameters.java
@@ -789,6 +789,7 @@ public class CLTParameters {
 	public boolean    gpu_save_ports_xy =   false; // debug feature - save calculated ports X,Y to compare with Java-generated
 	public boolean    gpu_show_jtextures =  true;  // debug feature - show Java-generated textures from non-overlapping in GPU (will not generate if false)
 	public boolean    gpu_show_extra =      true;  // show low-res data for macro
+	public boolean    gpu_show_geometry =   true;  // show geometry correction 
 	
 	public boolean    gpu_use_main =        false; // accelerate tile processor for the main quad camera
 	public boolean    gpu_use_main_macro =  false; // accelerate tile processor for the main quad camera in macro mode
@@ -1591,6 +1592,7 @@ public class CLTParameters {
 		properties.setProperty(prefix+"gpu_save_ports_xy",          this.gpu_save_ports_xy +"");
 		properties.setProperty(prefix+"gpu_show_jtextures",         this.gpu_show_jtextures +"");
 		properties.setProperty(prefix+"gpu_show_extra",             this.gpu_show_extra +"");
+		properties.setProperty(prefix+"gpu_show_geometry",          this.gpu_show_geometry +"");

 		properties.setProperty(prefix+"gpu_use_main",               this.gpu_use_main +"");
 		properties.setProperty(prefix+"gpu_use_main_macro",         this.gpu_use_main_macro +"");
@@ -2377,6 +2379,7 @@ public class CLTParameters {
 		if (properties.getProperty(prefix+"gpu_save_ports_xy")!=null)           this.gpu_save_ports_xy=Boolean.parseBoolean(properties.getProperty(prefix+"gpu_save_ports_xy"));
 		if (properties.getProperty(prefix+"gpu_show_jtextures")!=null)          this.gpu_show_jtextures=Boolean.parseBoolean(properties.getProperty(prefix+"gpu_show_jtextures"));
 		if (properties.getProperty(prefix+"gpu_show_extra")!=null)              this.gpu_show_extra=Boolean.parseBoolean(properties.getProperty(prefix+"gpu_show_extra"));
+		if (properties.getProperty(prefix+"gpu_show_geometry")!=null)           this.gpu_show_geometry=Boolean.parseBoolean(properties.getProperty(prefix+"gpu_show_geometry"));
 		
 		if (properties.getProperty(prefix+"gpu_use_main")!=null)                this.gpu_use_main=Boolean.parseBoolean(properties.getProperty(prefix+"gpu_use_main"));
 		if (properties.getProperty(prefix+"gpu_use_main_macro")!=null)          this.gpu_use_main_macro=Boolean.parseBoolean(properties.getProperty(prefix+"gpu_use_main_macro"));
@@ -3326,6 +3329,7 @@ public class CLTParameters {
 		gd.addCheckbox    ("Debug feature - save calculated ports X,Y to compare with Java-generated",                  this.gpu_save_ports_xy);
 		gd.addCheckbox    ("Show Java-generated textures from non-overlapping in GPU (will not generate if false)",     this.gpu_show_jtextures);
 		gd.addCheckbox    ("Show low-res data for macro (will not generate if false)",                                  this.gpu_show_extra);
+		gd.addCheckbox    ("Show per-tile geometry corrected tile coordinates and disparity derivatives",               this.gpu_show_geometry);

 		gd.addCheckbox    ("Accelerate tile processor for the main quad camera",                                        this.gpu_use_main);
 		gd.addCheckbox    ("Accelerate tile processor for the main quad camera in macro mode",                          this.gpu_use_main_macro);
@@ -4085,6 +4089,7 @@ public class CLTParameters {
 		this.gpu_save_ports_xy=     gd.getNextBoolean();
 		this.gpu_show_jtextures=    gd.getNextBoolean();
 		this.gpu_show_extra=        gd.getNextBoolean();
+		this.gpu_show_geometry=     gd.getNextBoolean();

 		this.gpu_use_main=          gd.getNextBoolean();
 		this.gpu_use_main_macro=    gd.getNextBoolean();

--- a/src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+++ b/src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
@@ -190,6 +190,38 @@ public class GPUTileProcessor {
    		this.task = task;
    		this.disp_dist = new float [NUM_CAMS][4];
    	}
+    	/**
+    	 * Initialize from the float array (read from the GPU)
+    	 * @param flt float array containig tasks data
+    	 * @param indx task number to use
+    	 */
+    	public TpTask(float [] flt, int indx, boolean use_aux)
+    	{
+    		task =    Float.floatToIntBits(flt[indx++]);
+    		int txy = Float.floatToIntBits(flt[indx++]);
+    		ty = txy >> 16;
+    		tx = txy & 0xffff;
+    		if (use_aux) {
+    			xy_aux = new float[NUM_CAMS][2];
+        		for (int i = 0; i < NUM_CAMS; i++) {
+        			xy_aux[i][0] = flt[indx++];
+        			xy_aux[i][1] = flt[indx++];
+        		}
+    		} else {
+    			xy = new float[NUM_CAMS][2];
+        		for (int i = 0; i < NUM_CAMS; i++) {
+        			xy[i][0] = flt[indx++];
+        			xy[i][1] = flt[indx++];
+        		}
+    		}
+    		target_disparity = flt[indx++];
+    		disp_dist = new float [NUM_CAMS][4];
+    		for (int i = 0; i < NUM_CAMS; i++) {
+    			for (int j = 0; j < 4; j++) {
+    				disp_dist[i][j] = flt[indx++];
+    			}
+    		}
+    	}

    	// convert this class instance to float array to match layout of the C struct
    	public float [] asFloatArray(boolean use_aux) {
@@ -896,6 +928,19 @@ public class GPUTileProcessor {
        	}
            cuMemcpyHtoD(gpu_tasks, Pointer.to(ftasks), TPTASK_SIZE * num_task_tiles * Sizeof.FLOAT);
        }
+        
+        public TpTask [] getTasks (boolean use_aux)
+        {
+        	float [] ftasks = new float [TPTASK_SIZE * num_task_tiles];
+        	cuMemcpyDtoH(Pointer.to(ftasks), gpu_tasks, TPTASK_SIZE * num_task_tiles * Sizeof.FLOAT);
+        	TpTask [] tile_tasks = new TpTask[num_task_tiles];
+        	for (int i = 0; i < num_task_tiles; i++) {
+        		tile_tasks[i] = new TpTask(ftasks, i* TPTASK_SIZE, use_aux);
+        	}
+        	return tile_tasks;
+        }
+        
+        
 /*
        public void setCorrIndices(int [] corr_indices)
        {
@@ -1364,6 +1409,7 @@ public class GPUTileProcessor {
         * @param tp_tasks array of tasks that contain masks of the required pairs
         * @return each element has (tile_number << 8) | (pair_number & 0xff)
         */
+    	@Deprecated
        public int [] getCorrTasks(
        		TpTask [] tp_tasks) {
        	int tilesX = img_width / DTT_SIZE;
@@ -1390,12 +1436,12 @@ public class GPUTileProcessor {
        	}
        	return iarr;
        }
-
        /**
         * Prepare contents pointers for calculation of the texture tiles (RGBA, 16x16)
         * @param tp_tasks array of tasks that contain masks of the required pairs
         * @return each element has (tile_number << 8) | (1 << LIST_TEXTURE_BIT)
         */
+    	@Deprecated
        public int [] getTextureTasks(
        		TpTask [] tp_tasks) {
        	int tilesX = img_width / DTT_SIZE;
@@ -2215,6 +2261,7 @@ public class GPUTileProcessor {
            return fimg;
        }

+        @Deprecated
    	public void  getTileSubcamOffsets(
    			final TpTask[]            tp_tasks,        // will use // modify to have offsets for 8 cameras
    			final GeometryCorrection  geometryCorrection_main,

--- a/src/main/java/com/elphel/imagej/tileprocessor/GeometryCorrection.java
+++ b/src/main/java/com/elphel/imagej/tileprocessor/GeometryCorrection.java
@@ -3052,21 +3052,13 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 * 5) return port center X and Y
 * line_time
 */
-
 		double [] imu =  null;
 		if (disp_dist != null) {
 			imu =  extrinsic_corr.getIMU(); // currently it is common for all channels
 			if ((deriv_rots == null) &&  ((imu[0] != 0.0) || (imu[1] != 0.0) ||(imu[2] != 0.0))){
-				// get deriv_rots - they are needed
-//				if (use_rig_offsets) {
-//					deriv_rots = extrinsic_corr.getRotDeriveMatrices(getRigMatrix);
-//				} else {
 				deriv_rots = extrinsic_corr.getRotDeriveMatrices();
-//				}
 			}
 		}
-
-
 		if ((disp_dist == null) && (pXYderiv != null)) {
 			disp_dist = new double [numSensors][4];
 		}
@@ -3097,7 +3089,6 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 			double pYci0 = pYc - disparity *  rXY[i][1];
 			// rectilinear, end of dealing with possibly other (master) camera, below all is for this camera distortions

-
 			// Convert a 2-d non-distorted vector to 3d at fl_pix distance in z direction
 			double [][] avi = {{pXci0}, {pYci0},{fl_pix}};
 			Matrix vi = new Matrix(avi); // non-distorted sensor channel view vector in pixels (z -along the common axis)
@@ -3111,20 +3102,10 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 			double pXci =  rvi.get(0, 0) * norm_z;
 			double pYci =  rvi.get(1, 0) * norm_z;

-			// debug
-			double norm_z_dbg = fl_pix/vi.get(2, 0);
-			double pXci_dbg =  vi.get(0, 0) * norm_z_dbg;
-			double pYci_dbg =  vi.get(1, 0) * norm_z_dbg;
-
 			// Re-apply distortion
 			double rNDi = Math.sqrt(pXci*pXci + pYci*pYci); // in pixels
 			//		Rdist/R=A8*R^7+A7*R^6+A6*R^5+A5*R^4+A*R^3+B*R^2+C*R+(1-A6-A7-A6-A5-A-B-C)");
 			double ri = rNDi* ri_scale; // relative to distortion radius
-			//    		double rD2rND = (1.0 - distortionA8 - distortionA7 - distortionA6 - distortionA5 - distortionA - distortionB - distortionC);
-
-			double rNDi_dbg = Math.sqrt(pXci_dbg*pXci_dbg + pYci_dbg*pYci_dbg); // in pixels
-			double ri_dbg = rNDi_dbg* ri_scale; // relative to distortion radius
-

 			double rD2rND = 1.0;
 			double rri = 1.0;
@@ -3137,7 +3118,6 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 			double pXid = pXci * rD2rND;
 			double pYid = pYci * rD2rND;

-
 			pXY[i][0] =  pXid + this.pXY0[i][0];
 			pXY[i][1] =  pYid + this.pXY0[i][1];

@@ -3173,7 +3153,6 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 				}
 			}
 			double delta_t = 0.0;
-//			double [] imu =  null;
 			double [][] dpXci_pYci_imu_lin = new double[2][3]; // null
 			if (disp_dist != null) {
 				disp_dist[i] =   new double [4]; // dx/d_disp, dx_d_ccw_disp
@@ -3184,7 +3163,6 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 						{ 0.0,                     0.0,                    0.0}}; // what is last element???
 				Matrix dd0 = new Matrix(add0);
 				Matrix dd1 = rots[i].times(dd0).getMatrix(0, 1,0,1).times(norm_z); // get top left 2x2 sub-matrix
-////				Matrix dd1 = dd0.getMatrix(0, 1,0,1); // get top left 2x2 sub-matrix
 				// now first column of 2x2 dd1 - x, y components of derivatives by disparity, second column - derivatives by ortho to disparity (~Y in 2d correlation)
 				// unity vector in the direction of radius
 				double c_dist = pXci/rNDi;
@@ -3206,12 +3184,7 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 				disp_dist[i][1] =   dd2.get(0, 1);
 				disp_dist[i][2] =   dd2.get(1, 0); // d_py/d_disp
 				disp_dist[i][3] =   dd2.get(1, 1);
-
-//				imu =  extrinsic_corr.getIMU(i); // currently it is common for all channels
-
 				// ERS linear does not yet use per-port rotations, probably not needed
-
-//				double [][] dpXci_pYci_imu_lin = new double[2][3]; // null
 				if ((imu != null) &&((imu[0] != 0.0) || (imu[1] != 0.0) ||(imu[2] != 0.0) ||(imu[3] != 0.0) ||(imu[4] != 0.0) ||(imu[5] != 0.0))) {
 					delta_t = dd2.get(1, 0) * disparity * line_time; // positive for top cameras, negative - for bottom
 					double ers_Xci = delta_t* (dpXci_dtilt * imu[0] + dpXci_dazimuth * imu[1]  + dpXci_droll * imu[2]);
@@ -3229,16 +3202,12 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 					}
 					pXY[i][0] +=  ers_Xci * rD2rND; // added correction to pixel X
 					pXY[i][1] +=  ers_Yci * rD2rND; // added correction to pixel Y
-
-
 				} else {
 					imu = null;
 				}
 // TODO: calculate derivatives of pX, pY by 3 imu omegas
 			}

-
-
 			if (pXYderiv != null) {
 				pXYderiv[2 * i] =   new double [CorrVector.LENGTH];
 				pXYderiv[2 * i+1] = new double [CorrVector.LENGTH];
@@ -3272,8 +3241,6 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 				// assuming drD2rND_imu* is zero (rD2rND does not depend on imu_*
 				// hope it will not be needed, as derivatives are used only for filed calibration, handled differently
 				if (imu != null) {
-//  dpX_d = delta_t * rD2rND * (dpXci_dtilt * imu[0] + dpXci_dazimuth * imu[1]  + dpXci_droll * imu[2]);
-//	dpX_d = delta_t * rD2rND * (dpYci_dtilt * imu[0] + dpYci_dazimuth * imu[1]  + dpYci_droll * imu[2]);
 					pXYderiv[2 * i + 0][CorrVector.IMU_INDEX+0] = delta_t * rD2rND * dpXci_dtilt; // *    imu[0];
 					pXYderiv[2 * i + 1][CorrVector.IMU_INDEX+0] = delta_t * rD2rND * dpYci_dtilt; // *    imu[0];
 					pXYderiv[2 * i + 0][CorrVector.IMU_INDEX+1] = delta_t * rD2rND * dpXci_dazimuth; // * imu[1];
@@ -3285,9 +3252,6 @@ matrix([[-0.125, -0.125,  0.125,  0.125, -0.125,  0.125, -0.   , -0.   ,   -0.
 					pXYderiv[2 * i + 1][CorrVector.IMU_INDEX+4] = delta_t * rD2rND * dpXci_pYci_imu_lin[1][1]; // * 	imu[5];
 					pXYderiv[2 * i + 0][CorrVector.IMU_INDEX+5] = delta_t * rD2rND * dpXci_pYci_imu_lin[0][2]; // *    imu[5];
 					pXYderiv[2 * i + 1][CorrVector.IMU_INDEX+5] = delta_t * rD2rND * dpXci_pYci_imu_lin[1][2]; // *    imu[5];
-
-					// TODO: Add linear egomotion
-
 				}

 				// verify that d/dsym are well, symmetrical

--- a/src/main/java/com/elphel/imagej/tileprocessor/ImageDttCPU.java
+++ b/src/main/java/com/elphel/imagej/tileprocessor/ImageDttCPU.java
@@ -191,6 +191,13 @@ public class ImageDttCPU {
 			  "top-aux",  "bottom-aux",  "left_aux",  "right-aux",  "diagm-aux",  "diago-aux", "hor-aux",  "vert-aux",
 			  "inter", "other", "dbg1"};

+	  static String [] GEOM_TITLES_DBG ={
+			  "px0","py0","px1","py1","px2","py2","px3","py3",
+			  "dd0-0","dd0-1","dd0-2","dd0-3",
+			  "dd1-0","dd1-1","dd1-2","dd1-3",
+			  "dd2-0","dd2-1","dd2-2","dd2-3",
+			  "dd3-0","dd3-1","dd3-2","dd3-3"};
+	  
 	  public static int  ML_OTHER_TARGET =            0;  // Offset to target disparity data in  ML_OTHER_INDEX layer tile
 	  public static int  ML_OTHER_GTRUTH =            2;  // Offset to ground truth disparity data in ML_OTHER_INDEX layer tile
 	  public static int  ML_OTHER_GTRUTH_STRENGTH =   4;  // Offset to ground truth confidence data in  ML_OTHER_INDEX layer tile
@@ -9596,6 +9603,7 @@ public class ImageDttCPU {
 		final int tilesX=width/transform_size;
 		final int tilesY=height/transform_size;
 		final int nTilesInChn=tilesX*tilesY;
+		final double [][] geom_dbg = new double [24][nTilesInChn];
 		// clt_data does not need to be for the whole image (no, it is used for textures)
 		final double [][][][][][][] clt_bidata = (keep_clt_data)? (new double[2][][][][][][]):null;
 		if (clt_bidata != null) {
@@ -9829,7 +9837,15 @@ public class ImageDttCPU {
 								centerX,
 								centerY,
 								disparity_main); //  + disparity_corr);
-
+						if (geom_dbg != null) {
+							for (int i = 0; i < quad_main; i++) {
+								geom_dbg[2 * i + 0][nTile] = centersXY_main[i][0]; // x
+								geom_dbg[2 * i + 1][nTile] = centersXY_main[i][1]; // y
+								for (int j = 0; j < 4; j++) {
+									geom_dbg[2 * quad_main + 4 * i + j][nTile] = disp_dist_main[i][j];
+								}
+							}
+						}
 						centersXY_aux =  geometryCorrection_aux.getPortsCoordinatesAndDerivatives(
 								geometryCorrection_main, //			GeometryCorrection gc_main,
 								true,            // boolean use_rig_offsets,
@@ -10327,6 +10343,15 @@ public class ImageDttCPU {
 			};
 		}
 		startAndJoin(threads);
+		if (geom_dbg != null) {
+			(new ShowDoubleFloatArrays()).showArrays(
+					geom_dbg,
+					tilesX,
+					tilesY,
+					true,
+					"geom_dbg",
+					GEOM_TITLES_DBG);
+		}
 		
 // If it was low-texture mode, 	use lt_corr to average bi-quad inter-correlation between neighbor tiles and then calculate disparity/strength
 		if (lt_corr != null) {

--- a/src/main/java/com/elphel/imagej/tileprocessor/QuadCLT.java
+++ b/src/main/java/com/elphel/imagej/tileprocessor/QuadCLT.java
@@ -24,6 +24,8 @@ package com.elphel.imagej.tileprocessor;
 **
 */

+import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
+
 import java.awt.Rectangle;
 import java.io.DataOutputStream;
 import java.io.FileNotFoundException;
@@ -42,9 +44,12 @@ import com.elphel.imagej.common.ShowDoubleFloatArrays;
 import com.elphel.imagej.correction.CorrectionColorProc;
 import com.elphel.imagej.correction.EyesisCorrections;
 import com.elphel.imagej.gpu.GPUTileProcessor;
+import com.elphel.imagej.gpu.GPUTileProcessor.TpTask;

 import ij.ImagePlus;
 import ij.ImageStack;
+import jcuda.Pointer;
+import jcuda.Sizeof;

 public class QuadCLT extends QuadCLTCPU {
 	private GPUTileProcessor.GpuQuad gpuQuad =              null;	
@@ -750,6 +755,46 @@ public class QuadCLT extends QuadCLTCPU {
 		int tilesX =     quadCLT_main.getGPU().getImageWidth()  / quadCLT_main.getGPU().getDttSize();
 		int tilesY =     quadCLT_main.getGPU().getImageHeight() / quadCLT_main.getGPU().getDttSize();
 		
+		if (clt_parameters.gpu_show_geometry) {
+			//			GPUTileProcessor.TpTask []
+			tp_tasks = quadCLT_main.getGPU().getTasks (false); // boolean use_aux)
+			double [][] geom_dbg = new double [ImageDtt.GEOM_TITLES_DBG.length][tilesX*tilesY];
+			int num_cams =GPUTileProcessor.NUM_CAMS;
+			for (int nt = 0; nt < tp_tasks.length; nt++) {
+				for (int i = 0; i < num_cams; i++) {
+					GPUTileProcessor.TpTask task = tp_tasks[nt];
+					int nTile = task.ty * tilesX + task.tx;
+					geom_dbg[2 * i + 0][nTile] = task.xy[i][0]; // x
+					geom_dbg[2 * i + 1][nTile] = task.xy[i][1]; // y
+					for (int j = 0; j < 4; j++) {
+						geom_dbg[2 * num_cams + 4 * i + j][nTile] = task.disp_dist[i][j];
+					}
+				}
+			}
+		    (new ShowDoubleFloatArrays()).showArrays(
+		            geom_dbg,
+		            tilesX,
+		            tilesY,
+		            true,
+		            name+"-GEOM-DBG-D"+clt_parameters.disparity,
+		            ImageDtt.GEOM_TITLES_DBG);
+			
+		}
+
+		/*
+        public TpTask [] getTasks (boolean use_aux)
+        {
+        	float [] ftasks = new float [TPTASK_SIZE * num_task_tiles];
+        	cuMemcpyDtoH(Pointer.to(ftasks), gpu_tasks, TPTASK_SIZE * num_task_tiles * Sizeof.FLOAT);
+        	TpTask [] tile_tasks = new TpTask[num_task_tiles];
+        	for (int i = 0; i < num_task_tiles; i++) {
+        		tile_tasks[i] = new TpTask(ftasks, i* TPTASK_SIZE, use_aux);
+        	}
+        	return tile_tasks;
+        }
+
+		 */
+		
 		
 		// Read extra data for macro generation: 4 DIFFs, 4 of R,  4 of B, 4 of G
 		// Available after gpu_diff_rgb_combo is generated in execTextures