removed former constants

4a49fd62 · Andrey Filippov · c581440b · 4a49fd62 · 4a49fd62
Commit 4a49fd62 authored Aug 07, 2020 by Andrey Filippov
Show whitespace changes
Inline Side-by-side

Showing with 41 additions and 85 deletions

GPUTileProcessor.java src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java +25 -69

TwoQuadCLT.java ...main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java +16 -16

No files found.
--- a/src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
+++ b/src/main/java/com/elphel/imagej/gpu/GPUTileProcessor.java
@@ -109,8 +109,8 @@ public class GPUTileProcessor {
 	public static int NUM_CAMS =                  4;
 	public static int NUM_PAIRS =                 6; // top hor, bottom hor, left vert, right vert, main diagonal, other diagonal
 	public static int NUM_COLORS =                3;
-	public static int IMG_WIDTH =              2592;
-	public static int IMG_HEIGHT =             1936;
+//	public static int IMG_WIDTH =              2592;
+//	public static int IMG_HEIGHT =             1936;
 	static int        KERNELS_HOR =             164;
 	static int        KERNELS_VERT =            123;
 	static int        KERNELS_LSTEP =             4;
@@ -124,8 +124,6 @@ public class GPUTileProcessor {
 	static int        IMCLT_TILES_PER_BLOCK =     4;
 	static int        TPTASK_SIZE =                 1+ 1+ NUM_CAMS * 2 + 1 + NUM_CAMS * 4 ; // tp_task structure size in floats
 	static int        CLTEXTRA_SIZE =               8;
-	static int        KERN_TILES =                  KERNELS_HOR *  KERNELS_VERT * NUM_COLORS;
-	static int        KERN_SIZE =                   KERN_TILES * 4 * 64;
 	static int        CORR_SIZE =                   (2* DTT_SIZE - 1) * (2* DTT_SIZE - 1); // 15x15
 	public static int CORR_NTILE_SHIFT =          8;  // also for texture tiles list
 	public static int CORR_PAIRS_MASK =        0x3f;  // lower bits used to address correlation pair for the selected tile
@@ -165,61 +163,6 @@ public class GPUTileProcessor {

    CUmodule    module; // to access constants memory
    
-    // CPU arrays of pointers to GPU memory
-    // Moved to GpuQuad class
-/*    
-    // These arrays may go to methods, they are here just to be able to free GPU memory if needed
-    private CUdeviceptr [] gpu_kernels_h =        new CUdeviceptr[NUM_CAMS];
-    private CUdeviceptr [] gpu_kernel_offsets_h = new CUdeviceptr[NUM_CAMS];
-    private CUdeviceptr [] gpu_bayer_h =          new CUdeviceptr[NUM_CAMS];
-    private CUdeviceptr [] gpu_clt_h =            new CUdeviceptr[NUM_CAMS];
-    private CUdeviceptr [] gpu_corr_images_h=     new CUdeviceptr[NUM_CAMS];
-
-
-    // GPU pointers to array of GPU pointers
-    private CUdeviceptr gpu_kernels =             new CUdeviceptr();
-    private CUdeviceptr gpu_kernel_offsets =      new CUdeviceptr();
-    private CUdeviceptr gpu_bayer =               new CUdeviceptr();
-    private CUdeviceptr gpu_tasks =               new CUdeviceptr(); //  allocate tilesX * tilesY * TPTASK_SIZE * Sizeof.FLOAT
-    private CUdeviceptr gpu_corrs =               new CUdeviceptr(); //  allocate tilesX * tilesY * NUM_PAIRS * CORR_SIZE * Sizeof.FLOAT
-    private CUdeviceptr gpu_textures =            new CUdeviceptr(); //  allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
-    private CUdeviceptr gpu_clt =                 new CUdeviceptr();
-    private CUdeviceptr gpu_4_images =            new CUdeviceptr();
-    private CUdeviceptr gpu_corr_indices =        new CUdeviceptr(); //  allocate tilesX * tilesY * 6 * Sizeof.FLOAT
-    private CUdeviceptr gpu_num_corr_tiles =      new CUdeviceptr(); //  allocate tilesX * tilesY * 6 * Sizeof.FLOAT
-    private CUdeviceptr gpu_texture_indices_ovlp =new CUdeviceptr(); //  allocate tilesX * tilesY * 6 * Sizeof.FLOAT
-    private CUdeviceptr gpu_num_texture_ovlp =    new CUdeviceptr(); //  8 ints
-    private CUdeviceptr gpu_texture_indices =     new CUdeviceptr(); //  allocate tilesX * tilesY * 6 * Sizeof.FLOAT
-    private CUdeviceptr gpu_texture_indices_len = new CUdeviceptr(); //  allocate tilesX * tilesY * 6 * Sizeof.FLOAT
-    private CUdeviceptr gpu_diff_rgb_combo =      new CUdeviceptr(); //  1 int
-
-    private CUdeviceptr gpu_color_weights =       new CUdeviceptr(); //  allocate 3 * Sizeof.FLOAT
-    private CUdeviceptr gpu_generate_RBGA_params =new CUdeviceptr(); //  allocate 5 * Sizeof.FLOAT
-
-    private CUdeviceptr gpu_woi =                 new CUdeviceptr(); //  4 integers (x, y, width, height) Rectangle - in tiles
-    private CUdeviceptr gpu_textures_rgba =       new CUdeviceptr(); //  allocate tilesX * tilesY * ? * 256 * Sizeof.FLOAT
-
-    private CUdeviceptr gpu_correction_vector=    new CUdeviceptr();
-    private CUdeviceptr gpu_rot_deriv=            new CUdeviceptr(); //  used internally by device, may be read to CPU for testing
-    private CUdeviceptr gpu_geometry_correction=  new CUdeviceptr();
-    private CUdeviceptr gpu_rByRDist=             new CUdeviceptr(); //  calculated once for the camera distortion model in CPU (move to GPU?)
-
-    private CUdeviceptr gpu_active_tiles =        new CUdeviceptr(); //  TILESX*TILESY*sizeof(int)
-    private CUdeviceptr gpu_num_active_tiles =    new CUdeviceptr(); //  1 int
-
-    CUmodule    module; // to access constants memory
-    private int mclt_stride;
-    private int corr_stride;
-    private int imclt_stride;
-    private int texture_stride;
-    private int texture_stride_rgba;
-    public int num_task_tiles;
-    public int num_corr_tiles;
-    public int num_texture_tiles;
-*/    
-//    public GpuQuad [][] gpuQuad; // array of GpuQuad instances 2x2? ({{rgb, rgb_macro}, {lwir, lwir_macro})
-    // initilize with 4 dimensions each
-    
    public class TpTask {
    	public int   task; // [0](+1) - generate 4 images, [4..9]+16..+512 - correlation pairs, 2 - generate texture tiles
    	public float target_disparity;
@@ -356,10 +299,10 @@ public class GPUTileProcessor {
        				"#define NUM_CAMS " +                 NUM_CAMS+"\n"+
        				"#define NUM_PAIRS " +                NUM_PAIRS+"\n"+
        				"#define NUM_COLORS " +               NUM_COLORS+"\n"+
-        				"#define IMG_WIDTH " +                IMG_WIDTH+"\n"+
-        				"#define IMG_HEIGHT " +               IMG_HEIGHT+"\n"+
-        				"#define KERNELS_HOR " +              KERNELS_HOR+"\n"+
-        				"#define KERNELS_VERT " +             KERNELS_VERT+"\n"+
+//        				"#define IMG_WIDTH " +                IMG_WIDTH+"\n"+
+//        				"#define IMG_HEIGHT " +               IMG_HEIGHT+"\n"+
+//        				"#define KERNELS_HOR " +              KERNELS_HOR+"\n"+
+//        				"#define KERNELS_VERT " +             KERNELS_VERT+"\n"+
        				"#define KERNELS_LSTEP " +            KERNELS_LSTEP+"\n"+
        				"#define THREADS_PER_TILE " +         THREADS_PER_TILE+"\n"+
        				"#define TILES_PER_BLOCK " +          TILES_PER_BLOCK+"\n"+
@@ -449,7 +392,6 @@ public class GPUTileProcessor {
            	}
            }
        }
-
        // Create the kernel functions (first - just test)
        String [] func_names = {
        		GPU_CONVERT_DIRECT_NAME,
@@ -487,6 +429,7 @@ public class GPUTileProcessor {
        // GPU data structures are now initialized through GpuQuad instances
    }
    
+
    public static String [] getCorrTitles() {
    	return new String []{"hor-top","hor-bottom","vert-left","vert-right","diag-main","diag-other"};
    }
@@ -632,6 +575,9 @@ public class GPUTileProcessor {
    	
    	public final int num_cams;
    	public final int num_colors; // maybe should always be 3?
+    	public final int kern_tiles;
+    	public final int kern_size;
+    	
 //    	public final GPUTileProcessor gPUTileProcessor;
        // CPU arrays of pointers to GPU memory
        // These arrays may go to methods, they are here just to be able to free GPU memory if needed
@@ -689,6 +635,9 @@ public class GPUTileProcessor {
        	this.num_colors =   num_colors; // maybe should always be 3?
        	this.kernels_hor =  kernels_hor;
        	this.kernels_vert = kernels_vert;
+        	this.kern_tiles =   kernels_hor *  kernels_vert * num_colors;
+        	this.kern_size =    kern_tiles * 4 * 64;
+        	
        	
            // CPU arrays of pointers to GPU memory
            // These arrays may go to methods, they are here just to be able to free GPU memory if needed
@@ -734,9 +683,9 @@ public class GPUTileProcessor {
            long [] device_stride = new long [1];
            for (int ncam = 0; ncam < num_cams; ncam++) {
            	gpu_kernels_h[ncam] =        new CUdeviceptr();
-            	cuMemAlloc(gpu_kernels_h[ncam],KERN_SIZE * Sizeof.FLOAT ); //     public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
+            	cuMemAlloc(gpu_kernels_h[ncam],kern_size * Sizeof.FLOAT ); //     public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
            	gpu_kernel_offsets_h[ncam] = new CUdeviceptr();
-            	cuMemAlloc(gpu_kernel_offsets_h[ncam],KERN_TILES * CLTEXTRA_SIZE * Sizeof.FLOAT ); //     public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
+            	cuMemAlloc(gpu_kernel_offsets_h[ncam],kern_tiles * CLTEXTRA_SIZE * Sizeof.FLOAT ); //     public static int cuMemAlloc(CUdeviceptr dptr, long bytesize)
            	gpu_bayer_h[ncam] =          new CUdeviceptr();
                cuMemAllocPitch (
                		gpu_bayer_h[ncam],        // CUdeviceptr dptr,
@@ -848,6 +797,11 @@ public class GPUTileProcessor {
            texture_stride_rgba = (int)(device_stride[0] / Sizeof.FLOAT);
    	}
    	
+    	public int getImageWidth()  {return this.img_width;}
+    	public int getImageHeight() {return this.img_height;}
+        public int getDttSize()     {return DTT_SIZE;}
+        public int getNumCams()     {return NUM_CAMS;}
+    	
        public void setGeometryCorrection(GeometryCorrection gc,
        		boolean use_java_rByRDist) { // false - use newer GPU execCalcReverseDistortions
        	float [] fgc = gc.toFloatArray();
@@ -922,8 +876,8 @@ public class GPUTileProcessor {
        		float [] kernel,  // [tileY][tileX][color][..]
        		float [] kernel_offsets,
        		int ncam) {
-            cuMemcpyHtoD(gpu_kernels_h[ncam],        Pointer.to(kernel),         KERN_SIZE * Sizeof.FLOAT);
-            cuMemcpyHtoD(gpu_kernel_offsets_h[ncam], Pointer.to(kernel_offsets), KERN_TILES * CLTEXTRA_SIZE * Sizeof.FLOAT);
+            cuMemcpyHtoD(gpu_kernels_h[ncam],        Pointer.to(kernel),         kern_size * Sizeof.FLOAT);
+            cuMemcpyHtoD(gpu_kernel_offsets_h[ncam], Pointer.to(kernel_offsets), kern_tiles * CLTEXTRA_SIZE * Sizeof.FLOAT);
        }

        public void setConvolutionKernels(
@@ -1304,6 +1258,7 @@ public class GPUTileProcessor {
                return;
            }
            // kernel parameters: pointer to pointers
+        	int tilesX =  img_width / DTT_SIZE;
            int [] GridFullWarps =    {1, 1, 1};
            int [] ThreadsFullWarps = {1, 1, 1};
            Pointer kernelParameters = Pointer.to(
@@ -1321,7 +1276,8 @@ public class GPUTileProcessor {
            		Pointer.to(new int[] { kernels_hor}),        // int                kernels_hor,
            		Pointer.to(new int[] { kernels_vert}),       // int                kernels_vert);
            		Pointer.to(gpu_active_tiles),
-            		Pointer.to(gpu_num_active_tiles)
+            		Pointer.to(gpu_num_active_tiles),
+            		Pointer.to(new int[] { tilesX })
            		);

            cuCtxSynchronize();

--- a/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+++ b/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
@@ -2204,22 +2204,22 @@ public class TwoQuadCLT {
 		System.out.println(" - textures:           "+(runTexturesTime*1.0e-6)+"ms");
 		System.out.println(" - RGBA:               "+(runTexturesRBGATime*1.0e-6)+"ms");
 		// get data back from GPU
-		float [][][] iclt_fimg = new float [GPUTileProcessor.NUM_CAMS][][];
+		float [][][] iclt_fimg = new float [gpuQuad_main.getNumCams()][][];
 		for (int ncam = 0; ncam < iclt_fimg.length; ncam++) {
 			iclt_fimg[ncam] = gpuQuad_main.getRBG(ncam);
 		}
-
-		int out_width =  GPUTileProcessor.IMG_WIDTH +  GPUTileProcessor.DTT_SIZE;
-		int out_height = GPUTileProcessor.IMG_HEIGHT + GPUTileProcessor.DTT_SIZE;
-		int tilesX =  GPUTileProcessor.IMG_WIDTH / GPUTileProcessor.DTT_SIZE;
-		int tilesY =  GPUTileProcessor.IMG_HEIGHT / GPUTileProcessor.DTT_SIZE;
+//		gpuQuad_main
+		int out_width =  gpuQuad_main.getImageWidth()  + gpuQuad_main.getDttSize();
+		int out_height = gpuQuad_main.getImageHeight() + gpuQuad_main.getDttSize();
+		int tilesX =     gpuQuad_main.getImageWidth()  / gpuQuad_main.getDttSize();
+		int tilesY =     gpuQuad_main.getImageHeight() / gpuQuad_main.getDttSize();
 		// show extra
 		/* */
 		String [] extra_group_titles = {"DIFF","Red","Blue","Green"};
-		String [] extra_titles = new String [extra_group_titles.length*GPUTileProcessor.NUM_CAMS];
+		String [] extra_titles = new String [extra_group_titles.length*gpuQuad_main.getNumCams()];
 		for (int g = 0; g < extra_group_titles.length;g++) {
-			for (int ncam=0; ncam < GPUTileProcessor.NUM_CAMS;ncam++) {
-				extra_titles[g * GPUTileProcessor.NUM_CAMS+ncam]= extra_group_titles[g]+"-"+ncam;
+			for (int ncam=0; ncam < gpuQuad_main.getNumCams();ncam++) {
+				extra_titles[g * gpuQuad_main.getNumCams() + ncam]= extra_group_titles[g]+"-"+ncam;
 			}
 		}
 		float [][] extra = gpuQuad_main.getExtra();
@@ -2416,7 +2416,7 @@ public class TwoQuadCLT {
 					texture_indices.length,
 		    		(is_mono?1:3), // int     num_colors,
 		    		clt_parameters.keep_weights); // boolean keep_weights);
-	    	int texture_slice_size = (2 * GPUTileProcessor.DTT_SIZE)* (2 * GPUTileProcessor.DTT_SIZE);
+	    	int texture_slice_size = (2 * gpuQuad_main.getDttSize())* (2 * gpuQuad_main.getDttSize());
 	    	int texture_tile_size = texture_slice_size * num_src_slices ;

 			if (debugLevel > -1) {
@@ -2430,10 +2430,10 @@ public class TwoQuadCLT {

 		    			for (int slice =0; slice < num_src_slices; slice++) {
 		    				System.out.println("=== Slice="+slice+" ===");
-		    				for (int i = 0; i < 2 * GPUTileProcessor.DTT_SIZE; i++) {
-		    					for (int j = 0; j < 2 * GPUTileProcessor.DTT_SIZE; j++) {
+		    				for (int i = 0; i < 2 * gpuQuad_main.getDttSize(); i++) {
+		    					for (int j = 0; j < 2 * gpuQuad_main.getDttSize(); j++) {
 		    						System.out.print(String.format("%10.4f ",
-		    								flat_textures[indx*texture_tile_size + slice* texture_slice_size + 2 * GPUTileProcessor.DTT_SIZE * i + j]));
+		    								flat_textures[indx*texture_tile_size + slice* texture_slice_size + 2 * gpuQuad_main.getDttSize() * i + j]));
 		    					}
 		    					System.out.println();
 		    				}
@@ -2459,10 +2459,10 @@ public class TwoQuadCLT {

    			for (int slice =0; slice < texture_tile.length; slice++) {
    				System.out.println("\n=== Slice="+slice+" ===");
-    				for (int i = 0; i < 2 * GPUTileProcessor.DTT_SIZE; i++) {
-    					for (int j = 0; j < 2 * GPUTileProcessor.DTT_SIZE; j++) {
+    				for (int i = 0; i < 2 * gpuQuad_main.getDttSize(); i++) {
+    					for (int j = 0; j < 2 * gpuQuad_main.getDttSize(); j++) {
    						System.out.print(String.format("%10.4f ",
-    								texture_tile[slice][2 * GPUTileProcessor.DTT_SIZE * i + j]));
+    								texture_tile[slice][2 * gpuQuad_main.getDttSize() * i + j]));
    					}
    					System.out.println();
    				}