package com.elphel.imagej.cuas;

import java.awt.Rectangle;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;

import com.elphel.imagej.cameras.CLTParameters;
import com.elphel.imagej.cameras.ColorProcParameters;
import com.elphel.imagej.common.DoubleGaussianBlur;
import com.elphel.imagej.common.PolynomialApproximation;
import com.elphel.imagej.common.ShowDoubleFloatArrays;
import com.elphel.imagej.gpu.GPUTileProcessor;
import com.elphel.imagej.gpu.GpuQuad;
import com.elphel.imagej.gpu.TpTask;
import com.elphel.imagej.readers.ImagejJp4Tiff;
import com.elphel.imagej.tileprocessor.ErsCorrection;
import com.elphel.imagej.tileprocessor.GeometryCorrection;
import com.elphel.imagej.tileprocessor.ImageDtt;
import com.elphel.imagej.tileprocessor.OpticalFlow;
import com.elphel.imagej.tileprocessor.QuadCLT;
import com.elphel.imagej.tileprocessor.QuadCLTCPU;
import com.elphel.imagej.tileprocessor.SetChannels;
import com.elphel.imagej.tileprocessor.TileProcessor;

import ij.ImagePlus;

public class CorrectionFPN {
	final QuadCLT quadCLT;
	public CorrectionFPN(QuadCLTCPU quadCLT) {
		this.quadCLT = (QuadCLT) quadCLT;
	}

	public static double [][][] cuasSubtractFpn(
			CLTParameters        clt_parameters,
			boolean              cuas_subtract_fpn, //
			boolean              changed,           // 	        boolean changed = quadCLT_main.isPhotometricUpdatedAndReset();
			ColorProcParameters  colorProcParameters,
			QuadCLT              center_CLT,        // where combo_dsi is. Should have hasCenterClt()   (run makeCenterClt() before)
			QuadCLT []           quadCLTs,
			SetChannels []       set_channels,
//			boolean              condition_dsi,
			int []               scene_range, // first/last
			int                  debugLevel) {
		boolean early_try_back = false;
		if (scene_range == null) {
			scene_range = new int [] {0, quadCLTs.length -1};
		}
		int earliest_scene = scene_range[0];
		int last_index =     scene_range[1];
		
		int fpn_width = center_CLT.getTilesX() * center_CLT.getTileSize(); // see if center_CLT can be used
		double [][][] fpn = null;
		boolean  condition_dsi = false; // true;
		boolean cuas_debug =         clt_parameters.imp.cuas_debug;  // save debug images (and show them if not in batch mode)
		boolean show_fpn = cuas_debug && !clt_parameters.batch_run; //
		
	    if (cuas_subtract_fpn) {
	        int     discard_border =         clt_parameters.imp.cuas_discard_border;
	        double  max_fold =               clt_parameters.imp.cuas_max_fold;
	        int     min_in_row_col =         clt_parameters.imp.cuas_min_in_row_col;
			int     cuas_invert_margins =    clt_parameters.imp.cuas_invert_margins;  // 1     Expand image each side when inverting tasks 	
			int     cuas_invert_iters =      clt_parameters.imp.cuas_invert_iters;    // 4     Enhance inversion iterations 
			double  cuas_invert_tolerance =  clt_parameters.imp.cuas_invert_tolerance;// 0.001 Finish enhancing when last change was lower than 		
			int     cuas_invert_gap2 =       clt_parameters.imp.cuas_invert_gap2;     // 10    Maximal dual gap size for inversion (depends on scanning radius in tiles)
			boolean cuas_calc_fpn =          clt_parameters.imp.cuas_calc_fpn;
			double  cuas_rot_period =        clt_parameters.imp.cuas_rot_period;
			double  cuas_um_sigma =          clt_parameters.imp.cuas_um_sigma;       // Apply Unsharp Mask filter sigma when calculating row/column noise
			double  cuas_max_abs_rowcol =    clt_parameters.imp.cuas_max_abs_rowcol; // consider pixels with abs(UM difference) does not exceed this value 
			double  cuas_outliers_rowcol =   clt_parameters.imp.cuas_outliers_rowcol;// scale weight of the outliers with high difference (to prevent undefined values 
	        if (changed) {
	            System.out.println ("00.re-spawning with updated photogrammetric calibration of reference scene.");
	        } else {
	            System.out.println ("00.no update, spawning only over null");
	        }
	        double [][] ref_combo_dsi =center_CLT.restoreComboDSI(true);
	        double []   disparity_center=  backPrepareCenter(
	                clt_parameters, // CLTParameters  clt_parameters,
	                center_CLT, //QuadCLT        center_CLT, // where combo_dsi is. Should have hasCenterClt()   (run makeCenterClt() before)
	                ref_combo_dsi, // double  [][]   ref_combo_dsi, // DSI data for the reference scene (or null to read it from file)
	                condition_dsi, // boolean        condition_dsi,
	                -1, // int            sensor_mask, // -1 - all;
	                debugLevel); // int            debugLevel) {

	        for (int scene_index =  last_index; scene_index >= earliest_scene ; scene_index--) {
	            // should we skip if already exists? Or need to re-run to apply new photometric calibration?
	            // Or should photogrammetric calibration be saved with center_CLT?
	            // to include ref scene photometric calibration
	            if (changed || (quadCLTs[scene_index] == null)) {
	                quadCLTs[scene_index] = quadCLTs[last_index].spawnNoModelQuadCLT( // restores image data
	                        set_channels[scene_index].set_name,
	                        clt_parameters,
	                        colorProcParameters, //
	                        ImageDtt.THREADS_MAX,
	                        debugLevel-2);
	            }
//	    		System.out.print("cuasSubtractFpn(): ");
	            quadCLTs[scene_index].setImageCenter(center_CLT.getImageCenter());
	            if (early_try_back) { // just faster debugging, not used in production
	                double [][][] back_prop = backPropagate(
	                        clt_parameters,        // CLTParameters     clt_parameters,
	                        discard_border,        // final int         discard_border,
	                        max_fold,              // final double      max_fold,
	                        min_in_row_col,        // final int         min_in_row_col,   // Minimal number of defined tiles in a row/column
	                        cuas_invert_margins,   // final int         invert_margins, // 1     Expand image each side when inverting tasks
	                        cuas_invert_gap2,      // final int         invert_gap2, // 10 // Maximal dual gap size for inversion (depends on scanning radius in tiles)  
	                        cuas_invert_iters,     // final int         invert_iters,   // 4     Enhance inversion iterations 
	                        cuas_invert_tolerance, // final double      invert_tolerance,// 0.001 Finish enhancing when last change was lower than 		
	                        center_CLT,            // final QuadCLT     center_CLT,
	                        quadCLTs[scene_index], // final QuadCLT     scene_CLT,
	                        disparity_center,      // double []   disparity_center
	                        debugLevel);           // final int         debugLevel)
	            }
	        }
	        fpn = center_CLT.getCorrectionFPN().readImageFPN ( -1); // int sens_mask);
	        boolean created_fpn = false;
	        double [][] fpn_weights = null;
	        if ((fpn == null) || cuas_calc_fpn) {
	            if (debugLevel >-3) {
	                System.out.println("Calculating FPN.");
	            }
	            int num_scenes = quadCLTs.length;
	            int rot_periods = (int) Math.floor(num_scenes/cuas_rot_period);
	            int rot_scenes =  (int) Math.floor(rot_periods *cuas_rot_period);
	            // Two full camera rotations to equalize contributions of different offsets
	            int [] rot_range = {0, rot_scenes-1}; // {0,174}; //{175,349}; // {0, 149}; //  {0+140, rot_scenes-1+140};
	            double [][][] fpn_and_weights = backPropagate(
	                    clt_parameters,        // CLTParameters     clt_parameters,
	                    discard_border,        // final int         discard_border,
	                    max_fold,              // final double      max_fold,
	                    min_in_row_col,        // final int         min_in_row_col,   // Minimal number of defined tiles in a row/column
	                    cuas_invert_margins,   // final int         invert_margins, // 1     Expand image each side when inverting tasks
	                    cuas_invert_gap2,      // final int         invert_gap2, // 10 // Maximal dual gap size for inversion (depends on scanning radius in tiles)  
	                    cuas_invert_iters,     // final int         invert_iters,   // 4     Enhance inversion iterations 
	                    cuas_invert_tolerance, // final double      invert_tolerance,// 0.001 Finish enhancing when last change was lower than 		
	                    center_CLT,            // final QuadCLT     center_CLT,
	                    quadCLTs,              // final QuadCLT[]   quadCLTs,
	                    rot_range[0],          // final int         first_index,
	                    rot_range[1],          // final int         last_index,
	                    disparity_center,      // double []   disparity_center
	                    debugLevel);           // final int         debugLevel)
	            fpn = new double [fpn_and_weights[0].length][1][];
	            for (int nsens = 0; nsens < fpn.length; nsens++) {
	                fpn[nsens][0] = fpn_and_weights[0][nsens];
	            }
	            // remove later - here just to safe save.
	            center_CLT.getCorrectionFPN().saveShowFPN(
	                    fpn,// double [][][] fpn,
	                    fpn_width, // int           width,
	                    true,  // boolean       save,
	                    show_fpn, // boolean       show) {
	                    QuadCLT.CENTER_FPN_SUFFIX+"-ORIG"); // String        suffix)

	            double [][] image_row_avg = getRowAvgMulti(
	                    fpn, // final double [][][] image_data,
	                    fpn_width,             // final int           width,
	                    cuas_max_abs_rowcol,           // final double        max_abs,         // only average within +/- max_abs
	                    cuas_outliers_rowcol);   // final double        weight_outlier)
	            double [][] image_col_avg = getColAvgMulti(
	                    fpn, // final double [][][] image_data,
	                    fpn_width,             // final int           width,
	                    cuas_max_abs_rowcol,           // final double        max_abs,         // only average within +/- max_abs
	                    cuas_outliers_rowcol);   // final double        weight_outlier)
	            double [][][] fpn_out = applyRowCol(
	                    fpn,          // final double [][][] image_data,
	                    image_row_avg,     // final double [][]   image_row_avg,
	                    image_col_avg,     // final double [][]   image_col_avg,
	                    false);            // final boolean       inplace )
	            fpn = fpn_out;
	            center_CLT.getCorrectionFPN().saveShowFPN(
	                    fpn,// double [][][] fpn,
	                    fpn_width, // int           width,
	                    true,  // boolean       save,
	                    show_fpn, // boolean       show) {
	                    QuadCLT.CENTER_FPN_SUFFIX+"-ROWCOL_RANGE"+rot_range[0]+"-"+rot_range[1]); // String        suffix)
	            
	            String fpn_weights_suffix="-FPN-WEIGHTS";
	            saveShowFPNWeights(
	                    fpn_and_weights,    // double [][][] fpn_weights,
	                    center_CLT,         //  QuadCLT       center_CLT,
	                    fpn_weights_suffix, //   String        suffix,
	                    true, //   boolean       save,
	                    show_fpn); //   boolean       show) {
	            created_fpn = true;
	            int dbg_sens = -12; // disable testing
	            if (cuas_debug && (dbg_sens >= 0)) {
	                center_CLT.getCorrectionFPN().debugFPN(
	                        quadCLTs,              // QuadCLT []     quadCLTs,
	                            fpn,                 // double [][][]  fpn,
	                            rot_range,           // int [] range,
	                            dbg_sens,            // int    nsens,
	                            cuas_um_sigma,       // 		  double um_sigma = 10.0;
	                            cuas_max_abs_rowcol, // double         max_abs, //  =        100.0;
	                            cuas_outliers_rowcol,// double         weight_outlier, // = 0.001;
	                            show_fpn);           // boolean show) {
	            }
	            
	        } else {
	            if (debugLevel >-3) {
	                System.out.println("Reusing FPN.");
	            }
	        }
	        // center_CLT.setImageData(fpn); // included in center_CLT.setApplyFPN(). // setting FPN images to the virtual (center) scene
	        if (created_fpn || show_fpn) {
	            center_CLT.getCorrectionFPN().saveShowFPN(
	                    fpn,// double [][][] fpn,
	                    fpn_width, // int           width,
	                    created_fpn, // boolean       save,
	                    show_fpn); // boolean       show) {
	        }
	        center_CLT.getCorrectionFPN().setApplyFPN(
	                quadCLTs, //  QuadCLT []    quadCLTs,
	                fpn);//   double [][][] fpn) 
	    } else {
	        if (debugLevel >-3) {
	            System.out.println("Skipping FPN.");
	        }
	        //			center_CLT.setImageData(null);
	        center_CLT.getCorrectionFPN().setApplyFPN(
	                quadCLTs, //  QuadCLT []    quadCLTs,
	                null);//   double [][][] fpn) 
	    }
	    return fpn;
	}
	

	public static void cuasSubtractRowColNoise(
			CLTParameters        clt_parameters,
			boolean              cuas_subtract_rowcol,
			QuadCLT              center_CLT,        // where combo_dsi is. Should have hasCenterClt()   (run makeCenterClt() before)
			QuadCLT []           quadCLTs,
			final double [][][]  fpn, // if null - already applied
			int                  debugLevel) {
		boolean cuas_debug =              clt_parameters.imp.cuas_debug;  // save debug images (and show them if not in batch mode)
		boolean show_fpn = cuas_debug && !clt_parameters.batch_run; //
		if (cuas_subtract_rowcol) {
			int fpn_width = center_CLT.getTilesX() * center_CLT.getTileSize(); // see if center_CLT can be used
			boolean cuas_calc_rowcol=    clt_parameters.imp.cuas_calc_rowcol;    // Recalculate+save  row/column noise, if false - try to read saved one first
			double  cuas_um_sigma =      clt_parameters.imp.cuas_um_sigma;       // Apply Unsharp Mask filter sigma when calculating row/column noise
			double  cuas_max_abs_rowcol= clt_parameters.imp.cuas_max_abs_rowcol; // consider pixels with abs(UM difference) does not exceed this value 
			double  cuas_outliers_rowcol=clt_parameters.imp.cuas_outliers_rowcol;// scale weight of the outliers with high difference (to prevent undefined values 
			/// read/calculate Row/Column noise correction				
			boolean is_new = false;
			double [][][][] row_col = null;
			if (!cuas_calc_rowcol) {
				if (debugLevel >-3) {
					System.out.println("Reading row/column noise per-scene data.");
				}
				row_col = center_CLT.getCorrectionFPN().readImageRowCol();
				if (show_fpn && (row_col != null)) {
					center_CLT.getCorrectionFPN().saveShowRowCol(
							row_col[0], // final double [][][] rows,    // [scene][sensor][width]
							row_col[1], // final double [][][] cols,    // [scene][sensor][height]
							false,     //final boolean       save,
							show_fpn); // final boolean       show)
				}
			}
			if (row_col == null) {
				if (debugLevel >-3) {
					System.out.println("Calculating row/column noise per-scene data.");
				}
				row_col= CorrectionFPN.calculateRowCol(
						quadCLTs, // final QuadCLT []     quadCLTs,
						fpn, // final double [][][]  fpn, // if null - already applied
						fpn_width, // final int            width,
						cuas_um_sigma, // final double         um_sigma,
						cuas_max_abs_rowcol, // final double         max_abs, //  =        100.0;
						cuas_outliers_rowcol); // final double         weight_outlier // = 0.001;)
				is_new = (row_col != null);
			}
			center_CLT.getCorrectionFPN().saveShowRowCol(
					row_col[0], // final double [][][] rows,    // [scene][sensor][width]
					row_col[1], // final double [][][] cols,    // [scene][sensor][height]
					is_new,     //final boolean       save,
					show_fpn); // final boolean       show)
			// TODO: apply to all scenes here ! *************************************
			
			CorrectionFPN.setApplyRowCol(
					quadCLTs, // final QuadCLT []     scenes,
					row_col[0], //final double [][][]  rows,
					row_col[1]); //  final double [][][]  cols )
			if (debugLevel > -4) {
				System.out.println("cuasSubtractRowColNoise(): Subtracted row/col noise from all source images in memory, marking them as applied.");	
			}

		} else { // reset row/col correction
			CorrectionFPN.setApplyRowCol(
					quadCLTs, // final QuadCLT []     scenes,
					null, //final double [][][]  rows,
					null); //  final double [][][]  cols )
			if (debugLevel > -4) {
				System.out.println("cuasSubtractRowColNoise(): Reset row/col noise in all source images.");	
			}
		}
		return;
	}



	public void setApplyFPN(
			QuadCLT []    quadCLTs,
			double [][][] fpn) { // can be null to reset
		///			  quadCLT.setImageData(fpn);
		quadCLT.setFPN(fpn);
		for (int nscene = 0; nscene < quadCLTs.length; nscene++) if (quadCLTs[nscene] != null){
			quadCLTs[nscene].setFPN(fpn);
			quadCLTs[nscene].applyFPN();
		}
		return;
	}



	public void applyFPN(
			double [][][] fpn) {
		if (quadCLT.getAppliedFPN() != fpn) {
			if (quadCLT.getImageData() == null) {
				System.out.println("applyFPN (): image_data==null");
				return;
			}
			final double [][][] image_data = quadCLT.getImageData();
			final double [][][] image_fpn_applied = quadCLT.getAppliedFPN();
			final Thread[] threads = ImageDtt.newThreadArray(ImageDtt.THREADS_MAX);
			final AtomicInteger ai = new AtomicInteger(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int nChn = ai.getAndIncrement(); nChn < image_data.length; nChn = ai.getAndIncrement()) {
							for (int ncol = 0; ncol < image_data[nChn].length; ncol++) {
								for (int npix = 0; npix < image_data[nChn][ncol].length; npix++) {
									if (image_fpn_applied != null) {
										image_data[nChn][ncol][npix] += image_fpn_applied[nChn][ncol][npix];
									}
									if (fpn != null) {
										image_data[nChn][ncol][npix] -= fpn[nChn][ncol][npix]; 
									}
								}
							}
						}
					}
				};
			}
			ImageDtt.startAndJoin(threads);
			quadCLT.setFpnApplied(fpn);
			quadCLT.setHasNewImageData(true);
		}
		return;
	}

	public ImagePlus saveShowFPN(
			double [][][] fpn,
			int           width,
			boolean       save,
			boolean       show) {
		return saveShowFPN(
				fpn, // double [][][] fpn,
				width, // int           width,
				save, // boolean       save,
				show, // boolean       show,
				QuadCLT.CENTER_FPN_SUFFIX); // String        suffix)
	}

	public ImagePlus saveShowFPN(
			double [][][] fpn,
			int           width,
			boolean       save,
			boolean       show,
			String        suffix) {
		ImagePlus imp =  showFPN(
				fpn, // double [][][] fpn,
				width, // int     width,
				show); // boolean show)
		if (save && (imp != null)) {
			quadCLT.saveImagePlusInModelDirectory(
					suffix, // String      suffix, // null - use title from the imp
					imp); // ImagePlus   imp)
		}
		return imp;
	}



	public ImagePlus saveShowRowCol(
			final double [][][] rows,    // [scene][sensor][width]
			final double [][][] cols,    // [scene][sensor][height]
			final boolean       save,
			final boolean       show) {
		ImagePlus imp = saveShowRowCol(
				rows,    // final double [][][] rows,    // [scene][sensor][width]
				cols,    // final double [][][] cols,    // [scene][sensor][height]
				show); //final boolean       show)
		if (save && (imp != null)) {
			quadCLT.saveImagePlusInModelDirectory(
					QuadCLT.CENTER_ROWCOL_SUFFIX, // String      suffix, // null - use title from the imp
					imp); // ImagePlus   imp)
		}
		return imp;
	}


	public static ImagePlus saveShowRowCol(
			final double [][][] rows,    // [scene][sensor][width]
			final double [][][] cols,    // [scene][sensor][height]
			final boolean       show) {
		final int num_scenes =   rows.length;
		final int num_sens =     rows[0].length;
		final int width =        rows[0][0].length;
		final int height =       cols[0][0].length;
		final int  img_width =  Math.max(width,  height);
		final int  img_height = num_scenes;
		final String [] titles_top = {"rows","columns"};
		final String [] titles = new String [num_sens];
		double [][][] data = new double [titles_top.length][num_sens][img_width*img_height];
		for (int nsens = 0; nsens < num_sens; nsens++) {
			titles[nsens]=String.format("sensor-%02d",nsens);
			for (int n = 0; n < data.length; n++) {
				Arrays.fill(data[n][nsens], Double.NaN);
			}

		}
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nsens = ai.getAndIncrement(); nsens < num_sens; nsens = ai.getAndIncrement()) {
						titles[nsens]=String.format("sensor-%02d",nsens);
						for (int n = 0; n < data.length; n++) {
							Arrays.fill(data[n][nsens], Double.NaN);
						}
						for (int nscene = 0; nscene < num_scenes; nscene++) {
							System.arraycopy(
									rows[nscene][nsens],
									0,
									data[0][nsens],
									nscene*img_width,
									width);
							System.arraycopy(
									cols[nscene][nsens],
									0,
									data[1][nsens],
									nscene*img_width,
									height);
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		String       title = "IMAGE"+QuadCLT.CENTER_ROWCOL_SUFFIX;
		ImagePlus imp = ShowDoubleFloatArrays.showArraysHyperstack(
				data,        // double[][][] pixels, 
				img_width,       // int          width, 
				title,       // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
				titles,      // String []    titles, // all slices*frames titles or just slice titles or null
				titles_top,  // String []    frame_titles, // frame titles or null
				show);      // boolean      show)
		imp.setProperty("numScenes",   num_scenes+"");
		imp.setProperty("numSensors",  num_sens+"");
		imp.setProperty("width",       width+"");
		imp.setProperty("height",      height+"");
		ImagejJp4Tiff.encodeProperiesToInfo(imp);
		return imp;
	}




	public double [][][] readImageFPN (
			int sens_mask){

		int [] wh = new int [2];
		double [][] fpn_data2 = quadCLT.readDoubleArrayFromModelDirectory(
				QuadCLT.CENTER_FPN_SUFFIX, // String      suffix,
				0,                 // int         num_slices, // (0 - all)
				wh); // int []      wh)
		if (fpn_data2 == null) {
			return null;
		}
		int num_sensors = quadCLT.getNumSensors();
		int used_sensors = 0;
		for (int nsens = 0; nsens < num_sensors; nsens++) if ( (sens_mask & (1 << nsens)) != 0) {
			used_sensors++;
		}
		int num_colors = fpn_data2.length/used_sensors;
		double [][][] fpn = new double [num_sensors][num_colors][];
		int isens = 0;
		for (int nsens = 0; nsens < num_sensors; nsens++) if ( (sens_mask & (1 << nsens)) != 0) {
			for (int ncol = 0; ncol < num_colors; ncol++) {
				fpn[nsens][ncol] = fpn_data2[ncol * used_sensors + isens];
			}
			isens++;
		}
		return fpn;
	}

	public double [][][][] readImageRowCol () {
		int [] wh = new int [2];
		ImagePlus imp = quadCLT.readImagePlusFromModelDirectory(
				QuadCLT.CENTER_ROWCOL_SUFFIX); // String      suffix,
		if (imp == null) {
			return null;
		}
		ImagejJp4Tiff.decodeProperiesFromInfo(imp);
		double [][] rowcol_data2 = ShowDoubleFloatArrays.readDoubleArray(
				imp,       // ImagePlus   imp,
				0,         // 	int         num_slices, // (0 - all)
				wh);       // int []      wh); // int []      wh)
		if (rowcol_data2 == null) {
			return null;
		}
		final int img_width = wh[0];
		final int num_sens =   rowcol_data2.length / 2;
		final int num_scenes = Integer.parseInt((String) imp.getProperty("numScenes"));
		final int num_sens0 =   Integer.parseInt((String) imp.getProperty("numSensors"));
		final int width =      Integer.parseInt((String) imp.getProperty("width"));
		final int height =     Integer.parseInt((String) imp.getProperty("height"));
		final double [][][] rows = new double[num_scenes][num_sens][width];
		final double [][][] cols = new double[num_scenes][num_sens][height];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nsens = ai.getAndIncrement(); nsens < num_sens; nsens = ai.getAndIncrement()) {
						for (int nscene = 0; nscene < num_scenes; nscene++) {
							System.arraycopy(
									rowcol_data2[nsens + 0],
									nscene*img_width,
									rows[nscene][nsens], // data[0][nsens],
									0,
									width);
							System.arraycopy(
									rowcol_data2[nsens + num_sens],
									nscene*img_width,
									cols[nscene][nsens], // data[1][nsens],
									0,
									height);
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return new double [][][][] {rows,cols};
	}



	public ImagePlus debugFPN(
			QuadCLT []     quadCLTs,
			double [][][]  fpn,
			int []         range,
			int            nsens,
			double         um_sigma,
			double         max_abs, //  =        100.0;
			double         weight_outlier, // = 0.001;
			boolean        show) {
		int fpn_width =quadCLT.getTilesX()*quadCLT.getTileSize(); // see if center_CLT can be used
		String dbg_title = quadCLT.getImageName()+"-DEBUG_FPN_ROWCOL_SENS_"+nsens+"-SIGMA"+um_sigma+"-MAXABS"+max_abs;
		ImagePlus imp = debugFPN(
				quadCLTs,       // final QuadCLT []     quadCLTs,
				fpn,            // final double [][][]  fpn,
				range,          // final int []         range, // required
				nsens,          // final int            nsens,
				fpn_width,      // final int            width,
				um_sigma,       // final double         um_sigma,
				max_abs,        // final double         max_abs, //  =        100.0;
				weight_outlier, // final double         weight_outlier, // = 0.001;
				dbg_title); // final String         title)
		if (imp != null) {
			quadCLT.saveImagePlusInModelDirectory(
					dbg_title, // String      suffix, // null - use title from the imp
					imp); // ImagePlus   imp)
			if (show) {
				imp.show();
			}
		}
		return imp; // ************** restart from here!
	}


	public static ImagePlus showFPN(
			double [][][] fpn,
			int     width,
			boolean show) {
		int num_sens = fpn.length;
		int num_colors = -1; // fpn[0].length;
		for (int i = 0; i < num_sens; i++) {
			if (fpn[i] != null) {
				num_colors = fpn[i].length;
				break;
			}
		}
		String [] titles = new String[num_sens];
		String [] top_titles = new String[num_colors];
		for (int ncol = 0; ncol < num_colors;ncol++) {
			top_titles[ncol] = "Color-"+ncol;
		}

		double [][][] img_data = new double [num_colors][num_sens][];
		for (int nsens = 0; nsens < num_sens; nsens++) if (fpn[nsens] != null) {
			titles[nsens]= "Sens-"+nsens;
			for (int ncol = 0; ncol < num_colors;ncol++) {
				img_data [ncol][nsens] = fpn[nsens][ncol];
			}
		}
		String fpn_title = "FPN_data";
		ImagePlus imp = ShowDoubleFloatArrays.showArraysHyperstack(
				img_data,     // double[][][] pixels, 
				width,    // int          width, 
				fpn_title,        // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
				titles,           // String []    titles, // all slices*frames titles or just slice titles or null
				top_titles,       // String []    frame_titles, // frame titles or null
				show);            // boolean      show)
		return imp;
	}

	/**
	 * Calculate sensors FPN in a simple way average each channel/color for a scene sequence. Intended to be
	 * used in CUAS mode averaging several (normally 2 full periods of rotation.
	 * TODO: make more accurate and universal by back-propagating the corrected image and subtracting per-sensor
	 * versions before averaging.
	 * @param quadCLTs sequence of scenes with conditioned images (getImageData() != null), without FPN applied
	 * @param range    a first_last scene index pair. May be adjusted to include an integer number of rotations.
	 * @param sensor_mask bitmask which sensors to process (normally -1 - all)
	 * @param debugLevel debug level
	 * @return [sensor][color][pixel] average FPN image. May be saved as image_data to a virtual (center) scene
	 * (a QuadCLT instance). 
	 */
	public static double [][][] calculateFPN_old(
			final QuadCLT []     quadCLTs,
			final int []         range, // required
			final int            sensor_mask,
			final int            debugLevel){
		QuadCLT first_scene =  quadCLTs[range[0]];
		final int num_sens =   first_scene.getNumSensors();    	
		final int num_colors = first_scene.getNumColors();
		final int width =      first_scene.getTilesX()*first_scene.getTileSize();
		final int height =     first_scene.getTilesY()*first_scene.getTileSize();
		final int num_pix =    width*height;
		final int num_scenes = range[1]-range[0]+1;
		final double [][][] fpn = new double [num_sens][num_colors][num_pix];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nChn = ai.getAndIncrement(); nChn < num_sens; nChn = ai.getAndIncrement()) if (((sensor_mask >> nChn) & 1) != 0) {
						for (int nscene = range[0]; nscene <= range[1]; nscene++) {
							for (int ncol = 0; ncol < num_colors; ncol++) {
								double [] img_slice = quadCLTs[nscene].getImageData()[nChn][ncol];
								for (int npix = 0; npix < num_pix; npix++) {
									fpn[nChn][ncol][npix] += img_slice[npix];
								}
							}
						}
						double scale = 1.0/num_scenes;
						for (int ncol = 0; ncol < num_colors; ncol++) {
							for (int npix = 0; npix < num_pix; npix++) {
								fpn[nChn][ncol][npix] *= scale;
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return fpn;
	}


	public static double [][][] calculateFPN(
			final QuadCLT []     quadCLTs,
			final int []         range, // required
			final int            sensor_mask,
			final int            debugLevel){
		QuadCLT first_scene =  quadCLTs[range[0]];
		final int num_sens =   first_scene.getNumSensors();    	
		final int num_colors = first_scene.getNumColors();
		final int width =      first_scene.getTilesX()*first_scene.getTileSize();
		final int height =     first_scene.getTilesY()*first_scene.getTileSize();
		final int num_pix =    width*height;
		final int num_scenes = range[1]-range[0]+1;
		final double [][][] fpn = new double [num_sens][num_colors][num_pix];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nChn = ai.getAndIncrement(); nChn < num_sens; nChn = ai.getAndIncrement()) if (((sensor_mask >> nChn) & 1) != 0) {
						for (int nscene = range[0]; nscene <= range[1]; nscene++) {
							for (int ncol = 0; ncol < num_colors; ncol++) {
								double [] img_slice = quadCLTs[nscene].getImageData()[nChn][ncol];
								for (int npix = 0; npix < num_pix; npix++) {
									fpn[nChn][ncol][npix] += img_slice[npix];
								}
							}
						}
						double scale = 1.0/num_scenes;
						for (int ncol = 0; ncol < num_colors; ncol++) {
							for (int npix = 0; npix < num_pix; npix++) {
								fpn[nChn][ncol][npix] *= scale;
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return fpn;
	}

	public static double [] backPrepareCenter(
			CLTParameters  clt_parameters,
			QuadCLT        center_CLT, // where combo_dsi is. Should have hasCenterClt()   (run makeCenterClt() before)
			double  [][]   ref_combo_dsi, // DSI data for the reference scene (or null to read it from file)
			boolean        condition_dsi,
			int            sensor_mask, // -1 - all;
			int            debugLevel) {
		if (!center_CLT.hasCenterClt()) {
			System.out.println(" center_CLT does not have .center_clt data, bailing out");
			return null;
		}
		double [] disparity_center = Cuas.getDisparityCenter(
				clt_parameters, // CLTParameters  clt_parameters,
				center_CLT,     // QuadCLT        center_CLT, // where combo_dsi is should have hasCenterClt(),  (run makeCenterClt() before)
				new double[3], // double []      center_atr,
				ref_combo_dsi,  // double  [][]   ref_combo_dsi, // DSI data for the reference scene (or null to read it from file)
				condition_dsi,  // boolean        condition_dsi,
				null,          // double [][][]  pref_pXpYD, // pointer to ref_pXpYD, should be [1][][]
				debugLevel); // int            debugLevel)


		return disparity_center;
	}

	public static double [][][] backPropagate(
			CLTParameters     clt_parameters,
			final int         discard_border,
			final double      max_fold,
			final int         min_in_row_col,   // Minimal number of defined tiles in a row/column
			final int         invert_margins, // 0     Expand image each side when inverting tasks 	
			final int         invert_gap2, // 10 // Maximal dual gap size for inversion (depends on scanning radius in tiles)  
			final int         invert_iters,   // 4     Enhance inversion iterations 
			final double      invert_tolerance,// 0.001 Finish enhancing when last change was lower than 		
			final QuadCLT     center_CLT,
			final QuadCLT []  quadCLTs,
			final int         first_index,
			final int         last_index,
			final double []   disparity_center,
			final int         debugLevel) {
		final int width =       center_CLT.getTilesX()*center_CLT.getTileSize();
		final int height =      center_CLT.getTilesY()*center_CLT.getTileSize();
		final int num_sens =    center_CLT.getNumSensors();
		final int num_pix =     width*height;
		final double [][] fpn =         new double [num_sens][num_pix];  
		final double [][] fpn_weights = new double [num_sens][num_pix];  
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);

		for (int nscene =  first_index; nscene <= last_index ; nscene++) { // was reversed ***
			if (debugLevel > -3) {
				System.out.println ("Processing scene "+nscene+" ("+quadCLTs[nscene].getImageName()+"). Last is "+last_index);
			}
			double [][][] diff_src_synth_weights = backPropagate(
					clt_parameters,        // CLTParameters     clt_parameters,
					discard_border,        // final int         discard_border,
					max_fold,              // final double      max_fold,
					min_in_row_col,        // final int         min_in_row_col,   // Minimal number of defined tiles in a row/column
					invert_margins,   // final int         invert_margins, // 1     Expand image each side when inverting tasks
					invert_gap2,      // final int         invert_gap2, // 10 // Maximal dual gap size for inversion (depends on scanning radius in tiles)  
					invert_iters,     // final int         invert_iters,   // 4     Enhance inversion iterations 
					invert_tolerance, // final double      invert_tolerance,// 0.001 Finish enhancing when last change was lower than 		
					center_CLT,            // final QuadCLT     center_CLT,
					quadCLTs[nscene], // final QuadCLT     scene_CLT,
					disparity_center,      // double []   disparity_center
					debugLevel);           // final int         debugLevel)
			ai.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int nSens = ai.getAndIncrement(); nSens < num_sens; nSens = ai.getAndIncrement()) {
							double [] diffs_sens =   diff_src_synth_weights[0][nSens];
							double [] weights =      diff_src_synth_weights[1][nSens];
							for (int npix = 0; npix < num_pix; npix++) {
								double d = diffs_sens[npix];
								if (!Double.isNaN(d)) {
									double w = weights[npix];
									fpn[nSens][npix] += d * w;
									fpn_weights[nSens][npix] += w;
								}
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nSens = ai.getAndIncrement(); nSens < num_sens; nSens = ai.getAndIncrement()) {
						for (int npix = 0; npix < num_pix; npix++) {
							fpn[nSens][npix] /= fpn_weights[nSens][npix];
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return new double [][][] {fpn, fpn_weights};

	}


	public static double [][][] backPropagate(
			CLTParameters     clt_parameters,
			final int         discard_border,
			final double      max_fold,
			final int         min_in_row_col,   // Minimal number of defined tiles in a row/column
			final int         invert_margins, // 0     Expand image each side when inverting tasks 	
			final int         invert_gap2, // 10 // Maximal dual gap size for inversion (depends on scanning radius in tiles)  
			final int         invert_iters,   // 4     Enhance inversion iterations 
			final double      invert_tolerance,// 0.001 Finish enhancing when last change was lower than 		
			final QuadCLT     center_CLT,
			final QuadCLT     scene_CLT,
			final double []   disparity_center,
			final int         debugLevel) {
		boolean en_debug = false;
//		System.out.print("backPropagate(): ");
		scene_CLT.setImageCenter(center_CLT.getImageCenter());  // set back-propagate mode
		boolean batch_run =clt_parameters.batch_run; // may be modified for debug
		int width=center_CLT.getTilesX()*center_CLT.getTileSize();
		int height=center_CLT.getTilesY()*center_CLT.getTileSize();
		// window in pixels!
		boolean apply_window_filter = (discard_border > 0) || (max_fold > 0) || (min_in_row_col > 0);
		final Rectangle window = apply_window_filter ? (new Rectangle(discard_border,discard_border,width-2*discard_border,height-2*discard_border)): null; 

		boolean cuas_debug =  en_debug && clt_parameters.imp.cuas_debug && !batch_run;  // save debug images (and show them if not in batch mode)
		String ts = scene_CLT.getImageName();
		//		  final double dts =scene_CLT.getTimeStamp();
		double []   scene_xyz = OpticalFlow.ZERO3;
		double []   scene_atr = OpticalFlow.ZERO3;
		ErsCorrection ers_reference = center_CLT.getErsCorrection();

		scene_xyz = ers_reference.getSceneXYZ(ts);  // saved @ reference, relative to reference
		scene_atr = ers_reference.getSceneATR(ts);  // saved @ reference, relative to reference
		if ((scene_atr==null) || (scene_xyz == null)) {
			System.out.println("backPropagate() BUG : ((scene_atr==null) || (scene_xyz == null)");
			return null;
		}
		double []   scene_ers_xyz_dt = ers_reference.getSceneErsXYZ_dt(ts);
		double []   scene_ers_atr_dt = ers_reference.getSceneErsATR_dt(ts);
		scene_CLT.getErsCorrection().setErsDt(
				scene_ers_xyz_dt, // double []    ers_xyz_dt,
				scene_ers_atr_dt); // double []    ers_atr_dt)(ers_scene_original_xyz_dt);

		double [][] scene_pXpYD=OpticalFlow.transformToScenePxPyD( // now should work with offset ref_scene
				null,   // final Rectangle [] extra_woi,    // show larger than sensor WOI (or null)
				disparity_center,       // final double []   disparity_ref, // invalid tiles - NaN in disparity
				scene_xyz,             // final double []   scene_xyz, // camera center in world coordinates
				scene_atr,             // final double []   scene_atr, // camera orientation relative to world frame
				scene_CLT,             // final QuadCLT     scene_QuadClt,
				center_CLT,            // final QuadCLT     reference_QuadClt, // now - may be null - for testing if scene is rotated ref
				ImageDtt.THREADS_MAX); // int               threadsMax)

		if (window != null) {
			center_CLT.windowPxPyD(	
					scene_pXpYD,           // final double [][] pXpYD,	
					window,          // final Rectangle window) // window in pixels!
					max_fold,        // final double max_fold)
					min_in_row_col); // final int         min_in_row_col,   // Minimal number of defined tiles in a row/column
		}
		int rendered_width = scene_CLT.getErsCorrection().getSensorWH()[0];
		boolean showPxPyD = cuas_debug; //  false;
		if (showPxPyD) {
			int dbg_width = rendered_width/GPUTileProcessor.DTT_SIZE;
			int dbg_height = scene_pXpYD.length/dbg_width;
			String [] dbg_titles = new String[] {"pX","pY","Disparity"};
			double [][] dbg_img = new double [dbg_titles.length][scene_pXpYD.length];
			for (int i = 0; i < dbg_img.length; i++) {
				Arrays.fill(dbg_img[i], Double.NaN);
			}
			for (int nTile = 0; nTile < scene_pXpYD.length; nTile++){
				if (scene_pXpYD[nTile] != null) {
					for (int i = 0; i < scene_pXpYD[nTile].length; i++) {
						dbg_img[i][nTile] = scene_pXpYD[nTile][i];
					}
				}
			}
			ShowDoubleFloatArrays.showArrays( // out of boundary 15
					dbg_img,
					dbg_width,
					dbg_height,
					true,
					scene_CLT.getImageName()+"-back-pXpYD",
					dbg_titles);
		}

		// Skipping MB
		TpTask[][] tp_tasks;
		tp_tasks = new TpTask[1][];
		tp_tasks[0] =  GpuQuad.setInterTasks(      // "true" reference, with stereo actual reference will be offset
				scene_CLT.getNumSensors(),
				rendered_width,                    // should match output size, pXpYD.length
				!scene_CLT.hasGPU(),               // final boolean             calcPortsCoordinatesAndDerivatives, // GPU can calculate them centreXY
				scene_pXpYD,                       // final double [][]         pXpYD, // per-tile array of pX,pY,disparity triplets (or nulls)
				null,                              // final boolean []          selection, // may be null, if not null do not  process unselected tiles
				scene_CLT.getErsCorrection(),      // final GeometryCorrection  geometryCorrection,
//				clt_parameters.imp.disparity_corr, // 04/07/2023 // 0.0,                      // final double              disparity_corr,
				// below is not yet used, added for consistency
				clt_parameters.imp.disparity_corr + scene_CLT.getDispInfinityRef(), // 12/11/2025 - added ref_scene.getDispInfinityRef()
				-1, // 0, // margin,               // final int                 margin,      // do not use tiles if their centers are closer to the edges
				null,                              // final boolean []          valid_tiles,            
				ImageDtt.THREADS_MAX);             // final int                 threadsMax)  // maximal number of threads to launch

		scene_CLT.saveQuadClt(); // to re-load new set of Bayer images to the GPU (do nothing for CPU) and Geometry
		// maybe no need to reload same image?
		ImageDtt image_dtt = new ImageDtt(
				scene_CLT.getNumSensors(),
				clt_parameters.transform_size,
				clt_parameters.img_dtt,
				scene_CLT.isAux(),
				scene_CLT.isMonochrome(),
				scene_CLT.isLwir(),
				clt_parameters.getScaleStrength(scene_CLT.isAux()),
				scene_CLT.getGPU());
		boolean use_reference = false;
		int [] wh = null;
		final boolean    show_nan = true;
		int              erase_clt = show_nan ? 1:0;

		image_dtt.preSetReferenceTD( // do not run execConvertDirect, exit after updating tasks
				clt_parameters.img_dtt,     // ,     // final ImageDttParameters  imgdtt_params,    // Now just extra correlation parameters, later will include, most others
				tp_tasks[0],                // final TpTask[]            tp_tasks,
				false, // 			final boolean             keep_tiles_offsets, // keep per-sensors offsets in tp_tasks
				clt_parameters.gpu_sigma_r, // final double              gpu_sigma_r,     // 0.9, 1.1
				clt_parameters.gpu_sigma_b, // final double              gpu_sigma_b,     // 0.9, 1.1
				clt_parameters.gpu_sigma_g, // final double              gpu_sigma_g,     // 0.6, 0.7
				clt_parameters.gpu_sigma_m, // final double              gpu_sigma_m,     //  =       0.4; // 0.7;
				debugLevel);                // final int                 globalDebugLevel)
		// invert tasks here **************************************************
		String    dbg_title =cuas_debug ? (center_CLT.getImageName()+"-PREINVERT-MARGIN_"+invert_margins): null;

		tp_tasks[0] = invertTask(
				clt_parameters,//CLTParameters     clt_parameters,
				center_CLT,                  // final QuadCLT center_CLT,     
				tp_tasks[0],                 // TpTask [] direct_tasks,
				center_CLT.getNumSensors(),  // final int numSensors,
				center_CLT.getTilesX(),      // final int tilesX,
				center_CLT.getTilesY(),      // final int tilesY,
				center_CLT.getTileSize(),    // final int tileSize,
				invert_margins,              // final int margins, //
				invert_gap2,                 // final int    invert_gap2, // 10 // Maximal dual gap size for inversion (depends on scanning radius in tiles)  
				invert_iters,                // final int num_iter,
				invert_tolerance,            // final double invert_tolerance,
				dbg_title); // String    dbg_title)

		image_dtt.preSetReferenceTD( // do not run execConvertDirect, exit after updating tasks
				clt_parameters.img_dtt,     // ,     // final ImageDttParameters  imgdtt_params,    // Now just extra correlation parameters, later will include, most others
				tp_tasks[0],                // final TpTask[]            tp_tasks,
				true,                       // 			final boolean             keep_tiles_offsets, // keep per-sensors offsets in tp_tasks
				clt_parameters.gpu_sigma_r, // final double              gpu_sigma_r,     // 0.9, 1.1
				clt_parameters.gpu_sigma_b, // final double              gpu_sigma_b,     // 0.9, 1.1
				clt_parameters.gpu_sigma_g, // final double              gpu_sigma_g,     // 0.6, 0.7
				clt_parameters.gpu_sigma_m, // final double              gpu_sigma_m,     //  =       0.4; // 0.7;
				debugLevel);                // final int                 globalDebugLevel)

		image_dtt.execConvertDirect(
				use_reference,  // boolean  use_reference_buffer,
				wh,             // int []   wh,
				erase_clt,      // int      erase_clt) {
				true,           // boolean  no_kernels)
				true);          // boolean  use_center_image)
		double [][][] result = scene_CLT.renderDoubleFromTD (
				wh,              // null, // int []  wh,
				use_reference);  // boolean use_reference
		int order = 2;
//		System.out.print("backPropagate(): disable back-propagate mode ");
		scene_CLT.setImageCenter(null); // disable back-propagate mode 
		double [][] coeffs = photometric(
				scene_CLT.getImageData(), // inal double [][][] raw_img,
				result, // final double [][][] synth_img,
				width, // final int           width,
				order, // final int           order,
				debugLevel); // final int           debugLevel){

		final double   um_sigma =     clt_parameters.imp.cuas_w_um_sigma;    //  1.5;   // run UM to approximate LoG
		final int      good_margins = clt_parameters.imp.cuas_w_good_margins;// 12;     // consider defined values near image margins always good (do not have anything better anyway). Does not apply to NaN
		//         // areas that will be filled from different poses
		final double   threshold=     clt_parameters.imp.cuas_w_threshold;   // 20;	   // threshold to cut off high variations (it will be squared to compare to squared difference values)
		final double   scale_thresh = clt_parameters.imp.cuas_w_scale_thresh;// 1.5;    // Allow over-threshold values before GB, to block them completely after GB thresholding
		final boolean  expand_max =   clt_parameters.imp.cuas_w_expand_max;  // true;   // replace diff values by max of neighbors (fill zero-square gaps) 
		final double   outliers_w =   clt_parameters.imp.cuas_w_outliers;    // 0.001;  // Assign weights to outliers to avoid NaNs in bad areas
		final double   w_blur =       clt_parameters.imp.cuas_w_blur;        // 1.0;    // blur thresholded squared values to smooth transitions. 
		String         weights_debug_title = cuas_debug?scene_CLT.getImageName()+"-DBG-WEIGHTS":null;

		double [][] weights =  getFPNWeights( // add avg?
				result,       // final double [][][] synth_img,
				um_sigma,     // final double        um_sigma,
				good_margins, // final int           good_margins,
				threshold,    // final double        threshold,
				scale_thresh, // final double        scale_thresh,
				expand_max,   // final boolean       expand_max,
				outliers_w,   // final double        outliers_w,
				w_blur,       // final double        w_blur,
				width,        // final int           width
				weights_debug_title); //String              debug_title) {


		if (debugLevel > -3) {
			for (int nsens = 0; nsens < coeffs.length; nsens++) {
				System.out.println (String.format("%2d: C=%8f B=%8f A=%8f", nsens, coeffs[nsens][0], coeffs[nsens][1], coeffs[nsens][2]));
			}
		}

		double [][][] source_img =   scene_CLT.getImageData();
		double [][] diff_src_synth = new double [source_img.length][source_img[0][0].length];

		for (int nsens = 0; nsens < coeffs.length; nsens++) {
			diff_src_synth[nsens] = new double[diff_src_synth[nsens].length]; // {corr_img[nsens][0].clone()};
			for (int i = 0; i < diff_src_synth[nsens].length; i++) {
				double d = result[nsens][0][i]; // src_img[nsens][0][i];
				diff_src_synth[nsens][i] = source_img[nsens][0][i] - (coeffs[nsens][0] + coeffs[nsens][1] * d + coeffs[nsens][2]*d*d);
			}
		}

		if (cuas_debug) {
			String [] titles_top = {"src_images","back", "diff", "weights"};
			double [][][] src_img = scene_CLT.getImageData();
			double [][][] dbg_data = new double [titles_top.length][result.length][result[0][0].length];
			String title = scene_CLT.getImageName()+"-BACKPROPAGATE-DIFF";
			String [] titles = new String[result.length];
			for (int i = 0; i < result.length; i++) {
				titles[i] = "SENS-"+i;
				dbg_data[0][i] = src_img[i][0];
				dbg_data[1][i] = result[i][0];
				dbg_data[2][i] = diff_src_synth[i];
				dbg_data[3][i] =  weights[i];
			}
			ImagePlus imp= ShowDoubleFloatArrays.showArraysHyperstack(
					dbg_data,          // double[][][] pixels, 
					width, // int          width, 
					title,         // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
					titles,       // String []    titles, // all slices*frames titles or just slice titles or null
					titles_top, // String []    frame_titles, // frame titles or null
					!batch_run);        // boolean      show)
			if ((imp != null) && !batch_run) {
				//refCLT
				String suffix ="-BACKPROPAGATE-DIFF-WEIGHTS";
				center_CLT.saveImagePlusInModelDirectory(
						suffix,          // String      suffix, // null - use title from the imp
						imp); // ImagePlus   imp)

			}
		}
		return new double [][][] {diff_src_synth, weights}; // result;
	}


	public static ImagePlus saveShowFPNWeights(
			double [][][] fpn_weights,
			QuadCLT       center_CLT,
			String        suffix,
			boolean       save,
			boolean       show) {
		final int width =       center_CLT.getTilesX()*center_CLT.getTileSize();
		final int num_sens =    center_CLT.getNumSensors();
		String [] titles_top = {"FPN","FPN-WEIGHTS"};
		String [] titles = new String [num_sens];
		for (int nsens = 0; nsens < num_sens; nsens++) {
			titles[nsens] = "SENSOR-"+nsens;
		}
		String title = center_CLT.getImageName()+suffix;
		ImagePlus imp = ShowDoubleFloatArrays.showArraysHyperstack(
				fpn_weights, // double[][][] pixels, 
				width,       // int          width, 
				title,       // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
				titles,      // String []    titles, // all slices*frames titles or just slice titles or null
				titles_top,  // String []    frame_titles, // frame titles or null
				show);       // boolean      show)
		if (save) {
			center_CLT.saveImagePlusInModelDirectory( //null poibnter
					suffix, // String      suffix, // null - use title from the imp
					imp); // ImagePlus   imp)
		}
		return imp;
	}







	public static TpTask [] invertTask(
			CLTParameters     clt_parameters,
			final QuadCLT center_CLT,     
			TpTask []     direct_tasks,
			final int     numSensors,
			final int     tilesX,
			final int     tilesY,
			final int     tileSize,
			final int     margin, // 4
			final int     invert_gap2, // 10 // Maximal dual gap size for inversion (depends on scanning radius in tiles)  
			final int     num_iter,
			final double  invert_tolerance, 
			String        dbg_title) {
		final int num_tiles = tilesX * tilesY;
		final double [][][]   directXY = new double [num_tiles][][];
		final double [][][][] mdata17 = new double [numSensors+1][direct_tasks.length][][]; // [3][];
		final double normal_damping = 0.001; // pull to horizontal if not enough data 
		final double [] damping = new double [] {normal_damping, normal_damping};

		final int tilesX_marg = tilesX + 2 * margin;
		final int tilesY_marg = tilesY + 2 * margin;
		final int num_tiles_marg = tilesX_marg * tilesY_marg; 

		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);


		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nTile = ai.getAndIncrement(); nTile < direct_tasks.length; nTile = ai.getAndIncrement()){
						TpTask tile = direct_tasks[nTile];
						int tx = tile.getTileX();
						int ty = tile.getTileY();
						double x = tx * tileSize + tileSize/2;  
						double y = ty * tileSize + tileSize/2;  
						int indx = tx+tilesX*ty;
						double [] centerXY =  tile.getDoubleCenterXY();
						if (centerXY == null) {
							continue;
						}
						double [][] xy = tile.getDoubleXY();
						if (xy == null) {
							continue;
						}
						double [][] xy17 = new double[numSensors+1][]; 
						for (int nsens = 0; nsens < numSensors; nsens++) {
							xy17[nsens] =  xy[nsens];
						}
						xy17[numSensors] = centerXY;
						directXY[indx] = xy17; 
						double w = 1.0;
						for (int nsens = 0; nsens < xy17.length; nsens++) {
							double [][] mdata = new double [3][]; // for each sensor separately
							mdata[0] = new double [2];
							mdata[0][0] =  x;
							mdata[0][1] =  y;
							mdata[1] = new double [2]; // [3];
							mdata[1][0] =  xy17[nsens][0]; 
							mdata[1][1] =  xy17[nsens][1]; 
							mdata[2] = new double [1];
							mdata[2][0] =  w;
							mdata17[nsens][nTile] = mdata;
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		// run approximation for each sensor+common separately
		ai.set(0);
		final double [][][]  approx2d17 = new double [mdata17.length][][];
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					PolynomialApproximation pa = new PolynomialApproximation();
					for (int nSens = ai.getAndIncrement(); nSens < mdata17.length; nSens = ai.getAndIncrement()){
						approx2d17[nSens] = pa.quadraticApproximation(
								mdata17[nSens],
								true,       // boolean forceLinear,  // use linear approximation
								damping,    // double [] damping, null OK
								-1);        // debug level
						continue;
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);

		double [][][] filled_data = new double[mdata17.length][2][num_tiles_marg];
		for (int n = 0; n < filled_data.length; n++) {
			for (int dimens = 0; dimens < filled_data[n].length; dimens++) {
				Arrays.fill(filled_data[n][dimens], Double.NaN);
			}
		}
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int n = ai.getAndIncrement(); n < 2 * filled_data.length; n = ai.getAndIncrement()){
						int nsens = n/2;
						int dimens = n % 2;
						if (approx2d17[nsens] != null) {
							for (int tileY = 0; tileY < tilesY; tileY++)  { // if ((directXY[ntile] != null) && (directXY[ntile][nsens] != null)) {
								double y = tileY * tileSize + tileSize/2;  
								for (int tileX = 0; tileX < tilesX; tileX++) {
									int ntile = tileX + tilesX * tileY;
									int ntile_marg = tileX + tilesX_marg * tileY + margin*(tilesX_marg + 1);
									double x = tileX * tileSize + tileSize/2;  // from (margin,margin)
									double pxy = 	approx2d17[nsens][dimens][0]*x + approx2d17[nsens][dimens][1]*y + approx2d17[nsens][dimens][2];
									if (directXY[ntile] != null) {
										filled_data[nsens][dimens][ntile_marg] = directXY[ntile][nsens][dimens] - pxy; // Null pointer
									}
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		int grow_width = (invert_gap2 >=0) ? invert_gap2 : 2* Math.max(tilesX_marg, tilesY_marg);
		int       num_passes = 10; // 20;
		for (int n = 0; n < filled_data.length; n++) {
			for (int dimens = 0; dimens < filled_data[n].length; dimens++) {
				filled_data[n][dimens] = TileProcessor.fillNaNs(
						filled_data[n][dimens],  // final double [] data,
						null,            // final boolean [] prohibit,
						tilesX_marg,     // int       width,
						grow_width,  // 100, // 2*width, // 16,           // final int grow,
						0.7,             // double    diagonal_weight, // relative to ortho
						num_passes,      // int       num_passes,
						0.03);           // final double     max_rchange, //  = 0.01 - does not need to be accurate
			}
		}
		double [][][] direct_filled = new double[filled_data.length][2][num_tiles_marg];
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int n = ai.getAndIncrement(); n < 2 * filled_data.length; n = ai.getAndIncrement()){
						int nsens = n/2;
						int dimens = n % 2;
						if (approx2d17[nsens] != null) {
							for (int tileY = 0; tileY < tilesY_marg; tileY++)  { // if ((directXY[ntile] != null) && (directXY[ntile][nsens] != null)) {
								double y = (tileY - margin) * tileSize + tileSize/2;  
								for (int tileX = 0; tileX < tilesX_marg; tileX++) {
									int ntile = tileX + tilesX_marg * tileY;
									double x = (tileX - margin) * tileSize + tileSize/2;  
									double pxy = 	approx2d17[nsens][dimens][0]*x + approx2d17[nsens][dimens][1]*y + approx2d17[nsens][dimens][2]; 	
									direct_filled[nsens][dimens][ntile] = filled_data[nsens][dimens][ntile] + pxy;
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		double [][][] dbg_inv = (dbg_title != null) ? (new double [17][][]) : null;
		double [][][] inverse_data =  invertXYPairs(
				direct_filled,    // final double [][][] xy_direct,  // [sensor][2][ntile]
				approx2d17,       // final double [][][] approx, // [sensor][2][3]
				tilesX_marg,      // final int           tilesX,
				tilesY_marg,      // final int           tilesY,
				tileSize,         // final int           tileSize,
				margin,           // final int           margin,
				num_iter,         // final int           num_iter)
				invert_tolerance, // final double        invert_tolerance, 
				dbg_inv);         // final double [][][] dbg_inv){ // null or [17][][]
		// reformat, keep only tiles that have all sensors
		double [][][] tile_inv_offsets = new double [num_tiles][][]; // will have nulls
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					int num_sens = inverse_data.length;
					for (int nTile = ai.getAndIncrement(); nTile < num_tiles; nTile = ai.getAndIncrement()){
						int tileX = nTile % tilesX;
						int tileY = nTile / tilesX;
						int ntile_marg = (tileX + margin) + (tileY + margin) * tilesX_marg;
						boolean noNaN = true;
						for (int nsens = 0; nsens < num_sens; nsens++) {
							if (Double.isNaN(inverse_data[nsens][0][ntile_marg]) || Double.isNaN(inverse_data[nsens][1][ntile_marg])) {
								noNaN = false;
								break;
							}
						}
						if (noNaN) {
							tile_inv_offsets[nTile] = new double [num_sens][2];
							for (int nsens = 0; nsens < num_sens; nsens++) {
								tile_inv_offsets[nTile][nsens][0] = inverse_data[nsens][0][ntile_marg];
								tile_inv_offsets[nTile][nsens][1] = inverse_data[nsens][1][ntile_marg];
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);

		TpTask[]  tasks_inverse = setInverseTasks(
				tile_inv_offsets, // final double [][][] xy,         // new double [tiles][sensors][2]. Last [16] sensor (if xy.length == 17) - centerXY. use disparity=0 - it will not be used
				tilesX,     //final int           tilesX,     //
				numSensors); // final int           num_sensors)

		if (dbg_title != null) {
			String [] titles_top= {"src-X", "filled-X", "initial-inv-X", "inverse-X", "approx_X", "diff-X", "diff-X-filled", "src-Y", "filled-Y", "initial-inv-Y", "inverse-Y", "approx_Y", "diff-Y", "diff-Y-filled"};
			int indx_src =         0;
			int indx_filled =      1;
			int indx_initial_inv = 2;
			int indx_inverse =     3;
			int indx_approx =      4;
			int indx_diff =        5;
			int indx_diff_filled = 6;
			int indx_X =           0;
			int indx_Y =           7;
			String [] titles = new String[approx2d17.length];
			double [][][] dbg_data = new double [titles_top.length][approx2d17.length][num_tiles_marg];
			for (int nsens=0;nsens < titles.length; nsens++) {
				if (nsens < ( titles.length - 1)) {
					titles[nsens] = "SENS-"+nsens;
				} else {
					titles[nsens] = "SENS-CENT";
				}
			}
			for (int i = 0; i < titles_top.length; i++) {
				for (int nsens=0;nsens < titles.length; nsens++) {
					Arrays.fill(dbg_data[i][nsens], Double.NaN);
				}
			}
			for (int ntile_marg = 0; ntile_marg < num_tiles_marg; ntile_marg++) {
				int tx = ntile_marg % tilesX_marg;
				int ty = ntile_marg / tilesX_marg;
				int tileX = tx - margin;
				int tileY = ty - margin;
				int ntile = tileX + tileY*tilesX;
				boolean inner = (tileX >= 0) && (tileY >= 0) && (tileX < tilesX)  && (tileY < tilesY); 
				double x = tileX * tileSize + tileSize/2;  
				double y = tileY * tileSize + tileSize/2;  
				for (int nsens = 0; nsens < titles.length; nsens++) {
					if (inner && (directXY[ntile] != null)){
						dbg_data[indx_src + indx_X][nsens][ntile_marg] = directXY[ntile][nsens][0]; // src-x
						dbg_data[indx_src + indx_Y][nsens][ntile_marg] = directXY[ntile][nsens][1]; // src-y
					}
					dbg_data[indx_filled + indx_X][nsens][ntile_marg] = direct_filled[nsens][0][ntile_marg];
					dbg_data[indx_filled + indx_Y][nsens][ntile_marg] = direct_filled[nsens][1][ntile_marg];
					if (approx2d17[nsens] != null) {
						double px = 	approx2d17[nsens][0][0]*x + approx2d17[nsens][0][1]*y + approx2d17[nsens][0][2]; 	
						double py = 	approx2d17[nsens][1][0]*x + approx2d17[nsens][1][1]*y + approx2d17[nsens][1][2]; 	
						dbg_data[indx_approx + indx_X][nsens][ntile_marg] = px;
						dbg_data[indx_approx + indx_Y][nsens][ntile_marg] = py;
						dbg_data[indx_diff +   indx_X][nsens][ntile_marg] = dbg_data[indx_src + indx_X][nsens][ntile_marg] - px;
						dbg_data[indx_diff +   indx_Y][nsens][ntile_marg] = dbg_data[indx_src + indx_Y][nsens][ntile_marg] - py;
					} else {
						System.out.println("invertTask(), nsens="+nsens);
					}
					dbg_data[indx_diff_filled + indx_X][nsens][ntile_marg] = filled_data [nsens][0][ntile_marg];
					dbg_data[indx_diff_filled + indx_Y][nsens][ntile_marg] = filled_data [nsens][1][ntile_marg];
					dbg_data[indx_initial_inv + indx_X][nsens][ntile_marg] = dbg_inv     [nsens][0][ntile_marg];
					dbg_data[indx_initial_inv + indx_Y][nsens][ntile_marg] = dbg_inv     [nsens][1][ntile_marg];
					dbg_data[indx_inverse +     indx_X][nsens][ntile_marg] = inverse_data[nsens][0][ntile_marg];
					dbg_data[indx_inverse +     indx_Y][nsens][ntile_marg] = inverse_data[nsens][1][ntile_marg];
				}
			}
			ShowDoubleFloatArrays.showArraysHyperstack(
					dbg_data,    // double[][][] pixels, 
					tilesX_marg, // int          width, 
					dbg_title,   // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
					titles,      // String []    titles, // all slices*frames titles or just slice titles or null
					titles_top,  // String []    frame_titles, // frame titles or null
					true);       // boolean      show)
		}
		return tasks_inverse;
	}


	public static double [][][] invertXYPairs(
			final double [][][] xy_direct,  // [sensor][2][ntile]
			final double [][][] approx, // [sensor][2][3]
			final int           tilesX, // marg, 82
			final int           tilesY, // marg, 66
			final int           tileSize,
			final int           margin,
			final int           num_iter,
			final double        invert_tolerance, 
			final double [][][] dbg_inv){ // null or [17][][]
		final int num_sensors = xy_direct.length;
		final int num_tiles = tilesX * tilesY;
		final int width =  tilesX * tileSize;
		final int height = tilesY * tileSize;
		final int tile_marg_dbg =  -5027;
		final double        invert_tolerance2 = invert_tolerance * invert_tolerance; 
		final double [][][] iapprox = new double [num_sensors][2][3];
		for (int nsens = 0; nsens < num_sensors; nsens++) {
			double det = approx[nsens][0][0]*approx[nsens][1][1]- approx[nsens][0][1]*approx[nsens][1][0];
			iapprox[nsens][0][0] =  approx[nsens][1][1]/det;
			iapprox[nsens][0][1] = -approx[nsens][0][1]/det;
			iapprox[nsens][1][0] = -approx[nsens][1][0]/det;
			iapprox[nsens][1][1] =  approx[nsens][0][0]/det;
			iapprox[nsens][0][2] = (-approx[nsens][1][1] * approx[nsens][0][2] + approx[nsens][0][1] * approx[nsens][1][2])/det;
			iapprox[nsens][1][2] =  (approx[nsens][1][0] * approx[nsens][0][2] - approx[nsens][0][0] * approx[nsens][1][2])/det;
		}
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);

		// initial approximation - invert x,y offsets
		final double [][][] xy = new double [num_sensors][2][num_tiles];
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nSens = ai.getAndIncrement(); nSens < num_sensors; nSens = ai.getAndIncrement()){
						Arrays.fill(xy[nSens][0], Double.NaN);
						Arrays.fill(xy[nSens][1], Double.NaN);
						for (int tileY = 0; tileY < tilesY; tileY++) {
							double y = (tileY-margin) * tileSize + tileSize/2;
							for (int tileX = 0; tileX < tilesX; tileX++) {
								double x = (tileX-margin) * tileSize + tileSize/2;
								int ntile = tileX + tilesX * tileY;
								xy[nSens][0][ntile] = (iapprox[nSens][0][0] * x + iapprox[nSens][0][1] * y + iapprox[nSens][0][2]);
								xy[nSens][1][ntile] = (iapprox[nSens][1][0] * x + iapprox[nSens][1][1] * y + iapprox[nSens][1][2]);
								if (ntile == tile_marg_dbg) {
									System.out.println("invertXYPairs(): tileX_marg="+tileX+", tileY_marg="+tileY);
									System.out.println("invertXYPairs(): ntile="+ntile);
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		if (dbg_inv != null) {
			for (int n = 0; n < dbg_inv.length; n++) {
				dbg_inv[n] = new double [2][];
				dbg_inv[n][0] = xy[n][0].clone();
				dbg_inv[n][1] = xy[n][1].clone();
			}
		}
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nSens = ai.getAndIncrement(); nSens < num_sensors; nSens = ai.getAndIncrement()){
						for (int tileY = 0; tileY < tilesY; tileY++) {
							double y = (tileY-margin) * tileSize + tileSize/2;
							boolean OK = false;
							for (int tileX = 0; tileX < tilesX; tileX++) {
								double x = (tileX-margin) * tileSize + tileSize/2;
								int ntile = tileX + tilesX * tileY;

								process_tile: {
									for (int niter = 0; niter <num_iter; niter++) {
										if (ntile == tile_marg_dbg) {
											System.out.println("invertXYPairs(): tileX_marg="+tileX+", tileY_marg="+tileY);
											System.out.println("invertXYPairs(): ntile="+ntile+", niter="+niter);
										}
										double xf = xy[nSens][0][ntile]; // common corrected image coorginates 
										if (Double.isNaN(xf)) {
											break process_tile;
										}
										double yf = xy[nSens][1][ntile];
										if (Double.isNaN(yf)) {
											break process_tile;
										}
										double xtile = xf/tileSize - 0.5 + margin; // x = (tileX-margin) * tileSize + tileSize/2;
										int x0 = (int) Math.floor(xtile);
										if ((x0 < 0) || (x0 >= (tilesX - 1))) {
											break process_tile;
										}
										double ytile=yf/tileSize - 0.5 + margin; // y = (tileY-margin) * tileSize + tileSize/2;
										int y0 = (int) Math.floor(ytile);
										if ((y0 < 0) || (y0 >= (tilesY - 1))) {
											break process_tile;
										}
										int indx00 = x0 + y0 * tilesX;
										double [][] corn4 = new double[][] {
											{xy_direct[nSens][0][indx00       +0],xy_direct[nSens][1][indx00       +0]},
											{xy_direct[nSens][0][indx00       +1],xy_direct[nSens][1][indx00       +1]},
											{xy_direct[nSens][0][indx00+tilesX+0],xy_direct[nSens][1][indx00+tilesX+0]},
											{xy_direct[nSens][0][indx00+tilesX+1],xy_direct[nSens][1][indx00+tilesX+1]}
										};
										double fx = xtile - x0; 
										double fy = ytile - y0;
										// bi-linear:
										double [] xy_bilin = new double[2];
										xy_bilin[0] =
												(1-fx)*(1-fy)*corn4[0][0]+
												(  fx)*(1-fy)*corn4[1][0]+
												(1-fx)*(  fy)*corn4[2][0]+
												(  fx)*(  fy)*corn4[3][0];
										xy_bilin[1] =
												(1-fx)*(1-fy)*corn4[0][1]+
												(  fx)*(1-fy)*corn4[1][1]+
												(1-fx)*(  fy)*corn4[2][1]+
												(  fx)*(  fy)*corn4[3][1];
										if (Double.isNaN(xy_bilin[0]) || Double.isNaN(xy_bilin[0])) {
											break process_tile;
										}
										double [] err_xy = {xy_bilin[0] - x, xy_bilin[1] - y};

										// apply correction
										double dx = (iapprox[nSens][0][0] *  err_xy[0] + iapprox[nSens][0][1] *  err_xy[1]);
										double dy = (iapprox[nSens][1][0] *  err_xy[0] + iapprox[nSens][1][1] *  err_xy[1]);
										// verify sign!
										xy[nSens][0][ntile] -= dx;
										xy[nSens][1][ntile] -= dy;
										double corr2 = dx * dx + dy*dy;
										if (corr2 < invert_tolerance2) {
											break;
										}
									}
									OK = true;
								}
								if (!OK) {
									xy[nSens][0][ntile] = Double.NaN;
									xy[nSens][1][ntile] = Double.NaN;
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		boolean skip_filter = false;
		if (!skip_filter) {
			ai.set(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int nSens = ai.getAndIncrement(); nSens < num_sensors; nSens = ai.getAndIncrement()){
							for (int ntile = 0; ntile < num_tiles; ntile++) {
								double xf = xy[nSens][0][ntile]; 
								double yf = xy[nSens][1][ntile];
								if (Double.isNaN(xf)|| Double.isNaN(yf) || (xf < 0) || (yf < 0) || (xf >= width) || (yf >= height)) {
									xy[nSens][0][ntile] = Double.NaN;
									xy[nSens][1][ntile] = Double.NaN;
								}
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		return xy;
	}

	public static TpTask[]  setInverseTasks(
			final double [][][] xy,         // new double [tiles][sensors][2]. Last [16] sensor (if xy.length == 17) - centerXY. use disparity=0 - it will not be used
			final int           tilesX,     //
			final int           num_sensors)//
	{
		final int task_code = (1 << GPUTileProcessor.TASK_CORR_EN) |  (1 << GPUTileProcessor.TASK_INTER_EN);
		final int tiles = xy.length;
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(00);
		final AtomicInteger aTiles = new AtomicInteger(0);
		final TpTask[] tp_tasks = new TpTask[tiles]; // aTiles.get()];
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				@Override
				public void run() {
					for (int nTile = ai.getAndIncrement(); nTile < tiles; nTile = ai.getAndIncrement()) if (xy[nTile] != null){
						int tileY = nTile / tilesX;
						int tileX = nTile % tilesX;
						TpTask tp_task = new TpTask(num_sensors, tileX, tileY);
						tp_task.task = task_code;
						double disparity = 0;
						tp_task.target_disparity = (float) disparity; // will it be used?
						int indx_center = (xy[nTile].length > num_sensors) ? num_sensors : 0;
						double [] centerXY = xy[nTile][indx_center];
						tp_task.setCenterXY(centerXY); // this pair of coordinates will be used by GPU to set tp_task.xy and task.disp_dist!
						tp_task.xy = new float [num_sensors][2];
						for (int nsens = 0; nsens < num_sensors; nsens++) {
							tp_task.xy[nsens][0] = (float) xy[nTile][nsens][0];
							tp_task.xy[nsens][1] = (float) xy[nTile][nsens][1];
						}
						tp_tasks[aTiles.getAndIncrement()] = tp_task;
					}
				}
			};
		}
		ImageDtt.startAndJoin(threads);
		final TpTask[] tp_tasks_out = new TpTask[aTiles.get()];
		System.arraycopy(tp_tasks, 0, tp_tasks_out, 0, tp_tasks_out.length);
		return tp_tasks_out;
	}


	public static int getImageHeight(
			double [][][] image_data,
			int width) {
		for (int nChn = 0; nChn < image_data.length; nChn++) {
			if (image_data[nChn] != null) {
				return image_data[nChn][0].length/width;
			}
		}
		return 0;

	}

	public static double [][]   getRowAvgMulti(
			final double [][][] image_data,
			final int           width,
			final double        max_abs,         // only average within +/- max_abs
			final double        weight_outlier){ // small weight of outliers to avoid instability
		final int num_sens = image_data.length;
		final int height = getImageHeight(
				image_data, // final double [][][] image_data,
				width);    // final int width){
		final double [][] image_row_avg =  new double [num_sens][width];
		final double [][] weight_row_avg = new double [num_sens][width];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nChn = ai.getAndIncrement(); nChn < num_sens; nChn = ai.getAndIncrement()) if (image_data[nChn] != null){
						int indx = 0;
						for (int y = 0; y < height; y++) {
							for (int x = 0; x < width; x++) {
								double d = image_data[nChn][0][indx++];
								double w = (Math.abs(d) <= max_abs) ? 1.0 : weight_outlier;
								image_row_avg[nChn][x] +=  w*d;
								weight_row_avg[nChn][x] += w;
							}
						}
						for (int x = 0; x < width; x++) {
							image_row_avg[nChn][x] /= weight_row_avg[nChn][x];
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return image_row_avg;
	}

	public static double [][]   getRowAvg(
			final double [][][] image_data,
			final int           width,
			final double        max_abs,         // only average within +/- max_abs
			final double        weight_outlier){ // small weight of outliers to avoid instability
		final int num_sens = image_data.length;
		final int height = getImageHeight(
				image_data, // final double [][][] image_data,
				width);    // final int width){
		final double [][] image_row_avg =  new double [num_sens][width];
		final double [][] weight_row_avg = new double [num_sens][width];
		for (int nChn = 0; nChn < num_sens; nChn++) if (image_data[nChn] != null){
			int indx = 0;
			for (int y = 0; y < height; y++) {
				for (int x = 0; x < width; x++) {
					double d = image_data[nChn][0][indx++];
					double w = (Math.abs(d) <= max_abs) ? 1.0 : weight_outlier;
					image_row_avg[nChn][x] +=  w*d;
					weight_row_avg[nChn][x] += w;
				}
			}
			for (int x = 0; x < width; x++) {
				image_row_avg[nChn][x] /= weight_row_avg[nChn][x];
			}
		}
		return image_row_avg;
	}

	public static double [][] getColAvgMulti(
			final double [][][] image_data,
			final int           width,
			final double        max_abs,         // only average within +/- max_abs
			final double        weight_outlier){ // small weight of outliers to avoid instability
		final int num_sens = image_data.length;
		final int height = getImageHeight(
				image_data, // final double [][][] image_data,
				width);    // final int width){
		final double [][] image_col_avg =  new double [num_sens][height];
		final double [][] weight_col_avg = new double [num_sens][height];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nChn = ai.getAndIncrement(); nChn < num_sens; nChn = ai.getAndIncrement()) if (image_data[nChn] != null){
						int indx = 0;
						for (int y = 0; y < height; y++) {
							for (int x = 0; x < width; x++) {
								double d = image_data[nChn][0][indx++];
								double w = (Math.abs(d) <= max_abs) ? 1.0 : weight_outlier;
								image_col_avg[nChn][y] +=  w*d;
								weight_col_avg[nChn][y] += w;
							}
						}
						for (int y = 0; y < height; y++) {
							image_col_avg[nChn][y] /= weight_col_avg[nChn][y];
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return image_col_avg;
	}

	public static double [][] getColAvg(
			final double [][][] image_data,
			final int           width,
			final double        max_abs,         // only average within +/- max_abs
			final double        weight_outlier){ // small weight of outliers to avoid instability
		final int num_sens = image_data.length;
		final int height = getImageHeight(
				image_data, // final double [][][] image_data,
				width);    // final int width){
		final double [][] image_col_avg =  new double [num_sens][height];
		final double [][] weight_col_avg = new double [num_sens][height];
		for (int nChn = 0; nChn < num_sens; nChn++) if (image_data[nChn] != null){
			int indx = 0;
			for (int y = 0; y < height; y++) {
				for (int x = 0; x < width; x++) {
					double d = image_data[nChn][0][indx++];
					double w = (Math.abs(d) <= max_abs) ? 1.0 : weight_outlier;
					image_col_avg[nChn][y] +=  w*d;
					weight_col_avg[nChn][y] += w;
				}
			}
			for (int y = 0; y < height; y++) {
				image_col_avg[nChn][y] /= weight_col_avg[nChn][y];
			}
		}
		return image_col_avg;
	}




	public static double [][][] applyRowCol(
			final double [][][] image_data,
			final double [][]   image_row_avg,
			final double [][]   image_col_avg,
			final boolean       inplace ){
		final int num_sens = image_data.length;
		final double [][][] out_data = new double [num_sens][1][];
		int w=0,h=0;
		for (int nchn = 0; nchn < num_sens; nchn ++) if (image_data[nchn] != null) {
			out_data[nchn][0] = inplace ? image_data[nchn][0] : image_data[nchn][0].clone();
			w = image_row_avg[nchn].length;
			h = image_col_avg[nchn].length;
		}
		final int width =  w;
		final int height = h;
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nChn = ai.getAndIncrement(); nChn < num_sens; nChn = ai.getAndIncrement()) if (image_data[nChn] != null){
						int indx = 0;
						for (int y = 0; y < height; y++) {
							for (int x = 0; x < width; x++) {
								out_data[nChn][0][indx++] -=  image_row_avg[nChn][x] + image_col_avg[nChn][y];
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return out_data;
	}

	public static void applyRowCol( // dangerous - leaves image_row_avg,image_col_avg not set
			final QuadCLT []     scenes,
			final double [][][]  rows,
			final double [][][]  cols ){
		for (int nscene = 0; nscene < scenes.length; nscene++) {
			double [][] row = (rows != null) ? rows[nscene] : null;
			double [][] col = (cols != null) ? cols[nscene] : null;
			applyRowCol(
					scenes[nscene], // final QuadCLT       scene,
					row,            // final double [][]   row,
					col);           // final double [][]   col )
		}
		return;
	}

	public static void setApplyRowCol(
			final QuadCLT []     scenes,
			final double [][][]  rows,
			final double [][][]  cols ){
		for (int nscene = 0; nscene < scenes.length; nscene++) {
			double [][] row = (rows != null) ? rows[nscene] : null;
			double [][] col = (cols != null) ? cols[nscene] : null;
			scenes[nscene].setRowCol(
					row, // double [][] row,
					col); // double [][] col)
			scenes[nscene].applyRowCol();
		}
		return;
	}



	public static void applyRowCol(
			final QuadCLT       scene,
			final double [][]   row,
			final double [][]   col ){
		if (scene.rowColDiffers(
				row, // double [][] row,
				col)) { // double [][] col)) {
			final double [][] row_applied = scene.getRowApplied();
			final double [][] col_applied = scene.getColApplied();
			if (scene.getImageData() == null) {
				System.out.println("applyRowCol (): image_data==null");
				return;
			}
			final double [][][] image_data = scene.getImageData();
			final int num_sens = image_data.length;
			int w=0,h=0;
			for (int nchn = 0; nchn < num_sens; nchn ++) if (image_data[nchn] != null) {
				w = row[nchn].length;
				h = col[nchn].length;
				break;
			}
			final int width =  w;
			final int height = h;
			final Thread[] threads = ImageDtt.newThreadArray();
			final AtomicInteger ai = new AtomicInteger(0);
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int nChn = ai.getAndIncrement(); nChn < num_sens; nChn = ai.getAndIncrement()) if (image_data[nChn] != null){
							if (row_applied!= null) { // unlikely
								int indx = 0;
								for (int y = 0; y < height; y++) {
									for (int x = 0; x < width; x++) {
										image_data[nChn][0][indx++] += row_applied[nChn][x];
									}
								}
							}
							if (col_applied!= null) { // unlikely
								int indx = 0;
								for (int y = 0; y < height; y++) {
									for (int x = 0; x < width; x++) {
										image_data[nChn][0][indx++] += col_applied[nChn][y];
									}
								}
							}
							if ((row != null) && (col != null)) { // most likely
								int indx = 0;
								for (int y = 0; y < height; y++) {
									for (int x = 0; x < width; x++) {
										image_data[nChn][0][indx++] -= row[nChn][x] + col[nChn][y];
									}
								}
							} else { // handle case if only one of row, col is nont null (not used so far)
								if (row != null) {
									int indx = 0;
									for (int y = 0; y < height; y++) {
										for (int x = 0; x < width; x++) {
											image_data[nChn][0][indx++] -= row[nChn][x];
										}
									}
								}
								if (col != null) {
									int indx = 0;
									for (int y = 0; y < height; y++) {
										for (int x = 0; x < width; x++) {
											image_data[nChn][0][indx++] -= col[nChn][y];
										}
									}
								}

							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
			//			  quadCLT.setFpnApplied(fpn);
			scene.setRowColApplied(row, col);
			scene.setHasNewImageData(true);
		}
		return;
	}






	public static double [][][][]calculateRowCol(
			final QuadCLT []     quadCLTs,
			final double [][][]  fpn, // if null - already applied
			final int            width,
			final double         um_sigma,
			final double         max_abs, //  =        100.0;
			final double         weight_outlier // = 0.001;
			){
		final int num_sens =     fpn.length;
		final int num_scenes =  quadCLTs.length; // will use all scenes
		final int num_pix = fpn[0][0].length;
		final int height = num_pix / width;
		final int ncolor = 0;
		final double [][][] rows = new double[num_scenes][num_sens][width];
		final double [][][] cols = new double[num_scenes][num_sens][height];
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int nScene = ai.getAndIncrement(); nScene < num_scenes; nScene = ai.getAndIncrement()) {
						double [][][] data_chn = new double [num_sens][][];
						for (int nsens = 0; nsens < num_sens; nsens++) {
							double [] img =      quadCLTs[nScene].getImageData()[nsens][ncolor].clone();

							if ((fpn != null) && (quadCLTs[nScene].getAppliedFPN() != fpn)) {
								for (int i = 0; i < num_pix; i++) {
									img[i] -= fpn[nsens][ncolor][i];
								}
							}
							OpticalFlow.applyUMDouble(
									img, // final double [] data,
									width,         // final int       width,
									um_sigma,      // final double um_sigma,
									1.0);    // final double um_weight)
							data_chn[nsens] =  new double [][] {img};
						}
						rows[nScene] = getRowAvg(
								data_chn, // final double [][][] image_data,
								width,             // final int           width,
								max_abs,           // final double        max_abs,         // only average within +/- max_abs
								weight_outlier);   // final double        weight_outlier)
						cols[nScene] = getColAvg(
								data_chn, // final double [][][] image_data,
								width,             // final int           width,
								max_abs,           // final double        max_abs,         // only average within +/- max_abs
								weight_outlier);   // final double        weight_outlier)
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return new double [][][][] {rows,cols};
	}


	public static ImagePlus debugFPN(
			final QuadCLT []     quadCLTs,
			final double [][][]  fpn,
			final int []         range, // required
			final int            nsens,
			final int            width,
			final double         um_sigma,
			final double         max_abs, //  =        100.0;
			final double         weight_outlier, // = 0.001;
			final String         title){

		int                     ncol = 0;
		int num_sens = quadCLTs[range[0]].getNumSensors();
		final double         um_weight = 1.0;
		int           num_pix = quadCLTs[range[0]].getImageData()[nsens][ncol].length;
		String [] titles_top = {"src","src-fpn","UM"+um_sigma, "row_col"};
		double [][][] data = new double [titles_top.length][range[1]-range[0]+1][num_pix];
		String [] titles = new String [range[1]-range[0]+1];
		for (int nscene = range[0]; nscene <= range[1]; nscene++) {
			int iscene = nscene - range[0];
			titles [iscene] = quadCLTs[nscene].getImageName();
			data[0][iscene] = quadCLTs[nscene].getImageData()[nsens][ncol].clone();
			data[1][iscene] = data[0][iscene].clone();  
			for (int i = 0; i < num_pix; i++) {
				data[1][iscene][i] -= fpn[nsens][ncol][i];
			}
			data[2][iscene] = data[1][iscene].clone();
			OpticalFlow.applyUMDouble(
					data[2][iscene], // final double [] data,
					width,         // final int       width,
					um_sigma,      // final double um_sigma,
					um_weight);    // final double um_weight)

			double [][][] data_chn = new double [num_sens][][];
			data_chn[nsens] = new double [][] {data[2][iscene]};
			double [][] image_row_avg = getRowAvgMulti(
					data_chn, // final double [][][] image_data,
					width,             // final int           width,
					max_abs,           // final double        max_abs,         // only average within +/- max_abs
					weight_outlier);   // final double        weight_outlier)
			double [][] image_col_avg = getColAvgMulti(
					data_chn, // final double [][][] image_data,
					width,             // final int           width,
					max_abs,           // final double        max_abs,         // only average within +/- max_abs
					weight_outlier);   // final double        weight_outlier)
			double [][][] data_out = applyRowCol(
					data_chn,          // final double [][][] image_data,
					image_row_avg,     // final double [][]   image_row_avg,
					image_col_avg,     // final double [][]   image_col_avg,
					false);            // final boolean       inplace )
			data[3][iscene] = data_out[nsens][0];
		}

		ImagePlus imp = ShowDoubleFloatArrays.showArraysHyperstack(
				data,        // double[][][] pixels, 
				width,       // int          width, 
				title,       // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
				titles,      // String []    titles, // all slices*frames titles or just slice titles or null
				titles_top,  // String []    frame_titles, // frame titles or null
				false);      // boolean      show)
		return imp;
	}	    

	/**
	 * Only uses color[0]
	 */
	public static double [][] photometric(
			final double [][][] raw_img,
			final double [][][] synth_img,
			final int           width,
			final int           order,
			final int           debugLevel){

		final int         num_sensors = raw_img.length;
		final double [][] lwir_corr = new double [num_sensors][3];
		final int         num_pix = raw_img[0][0].length;
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		final int poly_debug = 0;
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					double [][] pa_data = new double [num_pix][];
					for (int nSens = ai.getAndIncrement(); nSens < num_sensors; nSens = ai.getAndIncrement()) {
						Arrays.fill(pa_data, null);
						int num_good = 0;
						double avg = 0;
						for (int npix = 0; npix < num_pix; npix++) {
							double raw_d = raw_img[nSens][0][npix];
							double synth_d = synth_img[nSens][0][npix];
							if (!Double.isNaN(raw_d) && !Double.isNaN(synth_d)) {
								pa_data[npix] = new double [] {synth_d,raw_d};
								num_good++;
								avg += (raw_d - synth_d);
							}
						}
						avg /= num_good;
						if (order  > 0) {
							double [] pa_coeff =(new PolynomialApproximation(poly_debug)).polynomialApproximation1d(pa_data, order);
							double c = pa_coeff[0];
							double b = pa_coeff[1];
							double a = (pa_coeff.length > 2) ? pa_coeff[2] : 0.0;
							double A = a;
							double C = -c/b;
							double d2 = b*b - 4*a*c;
							C = (-b + Math.sqrt(d2))/(2 * a);
							double B = 2 * C * a + b;
							lwir_corr[nSens][0] = c; //C;
							lwir_corr[nSens][1] = b; //B;
							lwir_corr[nSens][2] = a; //A;


						} else {
							lwir_corr[nSens][0] = avg;
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return lwir_corr;
	}

	public static double [][] getFPNWeights(
			final double [][][] synth_img,
			final double        um_sigma,
			final int           good_margins,
			final double        threshold,
			final double        scale_thresh,
			final boolean       expand_max,
			final double        outliers_w,
			final double        w_blur,
			final int           width,
			String              debug_title) {
		final int         num_sensors = synth_img.length;
		final int         num_pix = synth_img[0][0].length;
		final int         height = num_pix/width;
		final double [][] weights = new double [num_sensors][num_pix];
		final Rectangle rborder = new Rectangle(good_margins,good_margins,width - 2*good_margins, height - 2 * good_margins);
		final Thread[] threads = ImageDtt.newThreadArray();
		final AtomicInteger ai = new AtomicInteger(0);
		final double bad_val = 1.0 - scale_thresh; // -0.5;
		final int [] offsets = {-width,-width+1, 1, width+1, width, width-1, -1, -width-1};
		// unsharp max
		String [] titles_top = (debug_title!= null) ? (new String [] {"synthetic","synth_GB","UM-squared","expanded","expanded-GB","weights"}): null;
		final double [][][] dbg_data = (debug_title!= null) ? new double [titles_top.length][num_sensors][] : null;
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					DoubleGaussianBlur gb = new DoubleGaussianBlur();
					double [] pre_max = new double [num_pix];
					for (int nSens = ai.getAndIncrement(); nSens < num_sensors; nSens = ai.getAndIncrement()) {
						double [] pixels = synth_img[nSens][0];
						if (dbg_data != null)  dbg_data[0][nSens] = pixels.clone();
						double [] w = weights[nSens];
						System.arraycopy(pixels,0,w,0,num_pix);
						for (int npix = 0; npix < num_pix; npix++) if (Double.isNaN(pixels[npix])){
							w[npix] = 0;
						}
						gb.blurDouble(
								w, //
								width, // terrain woi
								height,
								um_sigma,     // double sigmaX,
								um_sigma,     // double sigmaY,
								0.01);           // double accuracy)
						if (dbg_data != null)  dbg_data[1][nSens] = w.clone();

						for (int y = 0; y < height; y++) {
							for (int x = 0; x < width; x++) {
								int npix = x + width * y;
								double d = (pixels[npix] - w[npix])/threshold;
								w[npix] = Math.max(scale_thresh - d * d, bad_val);
								if (Double.isNaN(pixels[npix])) {
									w[npix] = bad_val; // bad
								} else if (!rborder.contains(x, y)) {
									w[npix] = scale_thresh; // good
								}
								npix++;
							}
						}
						if (dbg_data != null)  dbg_data[2][nSens] = w.clone();
						/*						  
						  for (int npix = 0; npix < num_pix; npix++) {
							  if (w[npix] > scale_thresh) {
								  w[npix] = scale_thresh;
							  } else if (w[npix] < bad_val) {
								  w[npix] = bad_val;
							  }
						  }
						  if (dbg_data != null)  dbg_data[3][nSens] = w.clone();
						 */						  
						if (expand_max) {
							System.arraycopy(w, 0, pre_max, 0, num_pix);
							for (int py = 1; py < (height-1); py++) {
								for (int px = 1; px < (width-1); px++) {
									int indx = py * width + px;
									double mx = w[indx]; 
									for (int offs:offsets) {
										mx = Math.min(mx, pre_max[indx + offs]);
									}
									w[indx] = mx;
								}
							}
						}
						if (dbg_data != null)  dbg_data[3][nSens] = w.clone();
						if (w_blur > 0) {
							gb.blurDouble(
									w, //
									width, // terrain woi
									height,
									w_blur,     // double sigmaX,
									w_blur,     // double sigmaY,
									0.01);           // double accuracy)
						}
						if (dbg_data != null)  dbg_data[4][nSens] = w.clone();
						for (int npix = 0; npix < num_pix; npix++) {
							if (w[npix] > 1.0) {
								w[npix] = 1.0;
							} else if (w[npix] < outliers_w) {
								w[npix] = outliers_w;
							}
						}
						if (dbg_data != null)  dbg_data[5][nSens] = w.clone();
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		if (dbg_data != null) {
			String [] titles = new String [num_sensors];
			for (int nsens = 0; nsens < num_sensors; nsens++) titles[nsens] = "SENSOR-"+nsens;
			ShowDoubleFloatArrays.showArraysHyperstack(
					dbg_data,        // double[][][] pixels, 
					width,       // int          width, 
					debug_title,       // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
					titles,      // String []    titles, // all slices*frames titles or just slice titles or null
					titles_top,  // String []    frame_titles, // frame titles or null
					true);      // boolean      show)
		}
		return weights;
	}


}
