/**
 **
 ** IntersceneLma - Adjust camera egomotion with LMA
 **
 ** Copyright (C) 2020-2023 Elphel, Inc.
 **
 ** -----------------------------------------------------------------------------**
 **
 **  IntersceneLma.java is free software: you can redistribute it and/or modify
 **  it under the terms of the GNU General Public License as published by
 **  the Free Software Foundation, either version 3 of the License, or
 **  (at your option) any later version.
 **
 **  This program is distributed in the hope that it will be useful,
 **  but WITHOUT ANY WARRANTY; without even the implied warranty of
 **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 **  GNU General Public License for more details.
 **
 **  You should have received a copy of the GNU General Public License
 **  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ** -----------------------------------------------------------------------------**
 **
 */

package com.elphel.imagej.tileprocessor;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.DoubleAdder;

import javax.xml.bind.DatatypeConverter;

import com.elphel.imagej.common.ShowDoubleFloatArrays;

import Jama.Matrix;
import ij.ImagePlus;

public class IntersceneLma {
	QuadCLT [] scenesCLT =    null; // now will use just 2 - 0 -reference scene, 1 - scene.  
	private double []         last_rms =        null; // {rms, rms_pure}, matching this.vector
	private double []         good_or_bad_rms = null; // just for diagnostics, to read last (failed) rms
	private double []         initial_rms =     null; // {rms, rms_pure}, first-calcualted rms
	private double []         y_vector =        null; // sum of fx(initial parameters) and correlation offsets
	private double []         s_vector =        null; // strength component - just for debug images
	private double []         last_ymfx =       null;
	private double [][]       last_jt =         null;
	private double []         weights; // normalized so sum is 1.0 for all - samples and extra regularization
	private double            sum_weights =     0;
	private int               num_defined =     0;    
	private double            pure_weight; // weight of samples only
	private boolean []        par_mask =        null;
	private int []            par_indices =     null;
	private double  []        parameters_full = null; // full parameters vector for the currenl LMA run
	private double  []        backup_parameters_full = null; // full parameters vector before first LMA run
	private double  []        parameters_vector = null; 
	private double  []        parameters_pull = null; // for regularization - error is proportional to difference between
	//                                                   current vector and parameters_pull
	private double  [][]      macrotile_centers = null;  // (will be used to pull for regularization)
	private double            infinity_disparity = 0.1;  // treat lower as infinity
	private int               num_samples = 0;
	///////////////////////////////////////////////////////////
	// thread_invariant is needed for LMA, otherwise even with the same parameter vector RMS may be slightly different
	// keeping thread-variant where it is done once per LMA (during setup) - in normalizeWeights() and setSamplesWeights()
	// See original code (with all switching between variant/invariant) in pre 12/17/2023
	///////////////////////////////////////////////////////////
	
	private boolean           thread_invariant = true; // Do not use DoubleAdder, provide results not dependent on threads
	private int               num_components =   2;    // 2 for 2D matching only,3 - include disparity
	private double            disparity_weight = 1.0;  // relative weight of disparity errors
	
	private double [][][]     eig_trans = null;
	private int               tilesX = -1;
	private String            dbg_prefix = null;
	
	public IntersceneLma(
			boolean thread_invariant,
			double disparity_weight
			) {
		this.num_components =   (disparity_weight > 0) ? 3 : 2;
		this.disparity_weight = disparity_weight;
		this.thread_invariant=  thread_invariant;
	}
	
	public int getNumComponents() {
		return num_components;
	}
	public double [][]       getLastJT(){
		return last_jt;
	}
	
	public double getSumWeights() {
		return sum_weights;
	}
	
	public int getNumDefined() {
		return num_defined;
	}

	
	public double[] getLastRms() {
		return last_rms;
	}
	public double [] getSceneXYZ(boolean initial) {
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[] {
				full_vector[ErsCorrection.DP_DSX],full_vector[ErsCorrection.DP_DSY],full_vector[ErsCorrection.DP_DSZ]};
	}
	public double [] getSceneATR(boolean initial) {
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[] {
				full_vector[ErsCorrection.DP_DSAZ],full_vector[ErsCorrection.DP_DSTL],full_vector[ErsCorrection.DP_DSRL]};
	}
	public double [][] getSceneXYZATR(boolean initial) {
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[][] {
				{full_vector[ErsCorrection.DP_DSX],full_vector[ErsCorrection.DP_DSY],full_vector[ErsCorrection.DP_DSZ]},
				{full_vector[ErsCorrection.DP_DSAZ],full_vector[ErsCorrection.DP_DSTL],full_vector[ErsCorrection.DP_DSRL]}};
	}
	
	public double [] getReferenceXYZ(boolean initial) {
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[] {
				full_vector[ErsCorrection.DP_DX],full_vector[ErsCorrection.DP_DY],full_vector[ErsCorrection.DP_DZ]};
	}
	public double [] getReferenceATR(boolean initial) {
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[] {
				full_vector[ErsCorrection.DP_DAZ],full_vector[ErsCorrection.DP_DTL],full_vector[ErsCorrection.DP_DRL]};
	}
	public double [][] getReferenceXYZATR(boolean initial) {
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[][] {
				{full_vector[ErsCorrection.DP_DX],full_vector[ErsCorrection.DP_DY],full_vector[ErsCorrection.DP_DZ]},
				{full_vector[ErsCorrection.DP_DAZ],full_vector[ErsCorrection.DP_DTL],full_vector[ErsCorrection.DP_DRL]}};
	}

	public double [] getSceneERSXYZ(boolean initial) { // never used
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[] {
				full_vector[ErsCorrection.DP_DSVX],full_vector[ErsCorrection.DP_DSVY],full_vector[ErsCorrection.DP_DSVZ]};
	}
	public double [] getSceneERSATR(boolean initial) { // never used
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[] {
				full_vector[ErsCorrection.DP_DSVAZ],full_vector[ErsCorrection.DP_DSVTL],full_vector[ErsCorrection.DP_DSVRL]};
	}
	public double [] getReferenceERSXYZ(boolean initial) { // never used
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[] {
				full_vector[ErsCorrection.DP_DVX],full_vector[ErsCorrection.DP_DVY],full_vector[ErsCorrection.DP_DVZ]};
	}
	public double [] getReferenceERSATR(boolean initial) { // never used
		double [] full_vector = initial? backup_parameters_full: getFullVector(parameters_vector);
		return new double[] {
				full_vector[ErsCorrection.DP_DVAZ],full_vector[ErsCorrection.DP_DVTL],full_vector[ErsCorrection.DP_DVRL]};
	}

	public String [] printOldNew(boolean allvectors) {
		return printOldNew(allvectors, 0);
	}

	public String [] printOldNew(boolean allvectors, int mode) {
		return printOldNew(allvectors, mode, 9, 6);
	}
	
	public String [] printOldNew(boolean allvectors, int w, int d) {
		return printOldNew(allvectors, 0, w, d);
	}

	public String [] printOldNew(boolean allvectors, int mode, int w, int d) {
		// mode: 0 - auto, 1 - force "was", 2 force "pull"
		
		String fmt1 = String.format("%%%d.%df", w+2,d+2); // more for the differences
		ArrayList<String> lines = new ArrayList<String>();
		for (int n = ErsCorrection.DP_DVAZ; n < ErsCorrection.DP_NUM_PARS; n+=3) {
			boolean adj = false;
			for (int i = 0; i <3; i++) adj |= par_mask[n+i];
			if (allvectors || adj) {
				String line = printNameV3(n, false, mode, w,d)+"  (" + getCompareType(mode)+
						" "+printNameV3(n, true, mode, w,d)+")";
				line += ", diff_last="+String.format(fmt1, getV3Diff(n)[0]);
				line += ", diff_first="+String.format(fmt1, getV3Diff(n)[1]);
				lines.add(line);
			}
		}
		return lines.toArray(new String[lines.size()]);
	}
	
	/**
	 * Calculate L2 for 3-vector difference between the adjusted values, this LMA start values
	 * and "backup" parameters - snapshot before the first adjustment (first calculation of 
	 * the motion vectors+LMA iteration  
	 * @param indx offset to the first element of the 3-vector in the full parameters array
	 * @return a pair of L2 differences {diff_with_this_LMA_start, diff_with_first_lma_start}
	 */
	public double [] getV3Diff(int indx) {
		double [] v_new = new double[3], v_backup = new double[3], v_start = new double[3];
		System.arraycopy(getFullVector(parameters_vector), indx, v_new, 0, 3);
		System.arraycopy(backup_parameters_full, indx, v_backup, 0, 3);
		System.arraycopy(parameters_full, indx, v_start, 0, 3);
		double l2_backup = 0;
		double l2_start = 0;
		for (int i = 0; i < 3; i++) {
			l2_backup += (v_new[i]-v_backup[i]) * (v_new[i]-v_backup[i]); 
			l2_start +=  (v_new[i]-v_start[i]) *  (v_new[i]-v_start[i]); 
		}
		return new double [] {Math.sqrt(l2_start), Math.sqrt(l2_backup)};
	}
	
	public String getCompareType() {
		return getCompareType(0);
//		return (parameters_pull != null)? "pull": "was";
	}

	public String getCompareType(int mode) {
		boolean is_pull = (mode == 0) ? (parameters_pull != null) : (mode > 1);  
		return is_pull? "pull": "was ";
	}
	
	public String printNameV3(int indx, boolean initial, int w, int d) {
		return printNameV3(indx, initial, 0, w, d);
	}

	public String printNameV3(int indx, boolean initial, int mode, int w, int d) {
		// mode: 0 - auto, 1 - was, 2 - pull 
		boolean use_pull = (mode == 0) ? (parameters_pull != null) : (mode > 1);  		
		double [] full_vector = initial? 
				(use_pull? getFullVector(parameters_pull) : backup_parameters_full):
			getFullVector(parameters_vector);
		double [] vector = new double[3];
		for (int i = 0; i <3; i++) {
			vector[i] = full_vector[indx + i];
		}
		String name = ErsCorrection.DP_VECTORS_NAMES[indx];
		return printNameV3(name, vector, w, d);
	}
	
	
	public static String printNameV3(String name, double[] vector) {
		return printNameV3(name, vector, 10, 6);
	}
	
	public static String printNameV3(String name, double[] vector, int w, int d) {
		return String.format("%14s: %s", name, printV3(vector, w, d));
	}

	
	public static String printV3(double[] vector) {
		return printV3(vector, 8, 5);
	}
	public static String printV3(double[] vector, int w, int d) {
		String fmt = String.format("[%%%d.%df, %%%d.%df, %%%d.%df]", w,d,w,d,w,d);
		return String.format(fmt, vector[0], vector[1], vector[2]);
	}

	public void prepareLMA(
			final double [][] scene_xyzatr0,     // camera center in world coordinates (or null to use instance)
			final double [][] scene_xyzatr_pull, // if both are not null, specify target values to pull to 
			final double [][] ref_xyzatr,        // 
			// reference atr, xyz are considered 0.0 - not anymore?
			final QuadCLT     scene_QuadClt,
			final QuadCLT     reference_QuadClt,
			final boolean[]   param_select,
			final double []   param_regweights,
			final double      eig_max_sqrt, //  6;    // for sqrt(lambda) - consider infinity (infinite linear feature, a line)
			final double      eig_min_sqrt, //  1;    // for sqrt(lambda) - consider infinity (infinite linear feature, a line)
			//if (eigen != null) normalize by eigenvalues, recalc derivatives to eigenvectors directions
			final double [][] eigen, // [tilesX*tilesY]{lamb0_x,lamb0_y, lamb0, lamb1} eigenvector0[x,y],lam0,lam1
			// now includes optional Disparity as the last element (for num_components==3)
			final double [][] vector_XYSDS,// optical flow X,Y, confidence obtained from the correlate2DIterate()
			final double [][] centers,     // macrotile centers (in pixels and average disparities
			final boolean     same_weights,
			boolean           first_run,
			String            dbg_prefix, // null or image name prefix
			final int         debug_level) {
		// before getFxDerivs
		eig_trans = setEigenTransform(
				eig_max_sqrt, // final double   eig_max_sqrt,
				eig_min_sqrt, // final double   eig_min_sqrt,
				eigen); // final double [][] eigen); // [tilesX*tilesY]{lamb0_x,lamb0_y, lamb0, lamb1} eigenvector0[x,y],lam0,lam1
		tilesX = reference_QuadClt.getTileProcessor().getTilesX(); // just for debug images
		scenesCLT = new QuadCLT [] {reference_QuadClt, scene_QuadClt};
		par_mask = param_select;
		macrotile_centers = centers;
		num_samples = num_components * centers.length;
		this.dbg_prefix = dbg_prefix;
		s_vector = (this.dbg_prefix != null) ? (new double[centers.length]): null; // original strength
		ErsCorrection ers_ref =   reference_QuadClt.getErsCorrection();
		ErsCorrection ers_scene = scene_QuadClt.getErsCorrection();
		final double []   scene_xyz = (scene_xyzatr0 != null) ? scene_xyzatr0[0] : ers_scene.camera_xyz;
		final double []   scene_atr = (scene_xyzatr0 != null) ? scene_xyzatr0[1] : ers_scene.camera_atr;
		final double []   reference_xyz = (ref_xyzatr != null)? ref_xyzatr[0]:     ers_ref.camera_xyz; // new double[3];
		final double []   reference_atr = (ref_xyzatr != null)? ref_xyzatr[1]:     ers_ref.camera_xyz; // new double[3];
		double [] full_parameters_vector = new double [] {
				0.0,                             0.0,                             0.0,
				ers_ref.ers_watr_center_dt[0],   ers_ref.ers_watr_center_dt[1],   ers_ref.ers_watr_center_dt[2],
				ers_ref.ers_wxyz_center_dt[0],   ers_ref.ers_wxyz_center_dt[1],   ers_ref.ers_wxyz_center_dt[2],
				reference_atr[0],                reference_atr[1],                reference_atr[2],
				reference_xyz[0],                reference_xyz[1],                reference_xyz[2],
				ers_scene.ers_watr_center_dt[0], ers_scene.ers_watr_center_dt[1], ers_scene.ers_watr_center_dt[2],
				ers_scene.ers_wxyz_center_dt[0], ers_scene.ers_wxyz_center_dt[1], ers_scene.ers_wxyz_center_dt[2],
				scene_atr[0],                    scene_atr[1],                    scene_atr[2],
				scene_xyz[0],                    scene_xyz[1],                    scene_xyz[2]};
		parameters_full = full_parameters_vector.clone();
		if ((vector_XYSDS != null) && (first_run || (backup_parameters_full == null))) {
			backup_parameters_full = full_parameters_vector.clone();
		}
		int num_pars = 0;
		for (int i = 0; i < par_mask.length; i++) if (par_mask[i]) num_pars++;
		par_indices = new int [num_pars];
		num_pars = 0;
		for (int i = 0; i < par_mask.length; i++) {
			if (par_mask[i]) par_indices[num_pars++] = i;
		}
		parameters_vector = new double [par_indices.length];
		for (int i = 0; i < par_indices.length; i++) {
			parameters_vector[i] = full_parameters_vector[par_indices[i]];
		}

		if ((scene_xyzatr_pull != null) && (scene_xyzatr_pull[0] != null) && (scene_xyzatr_pull[1] != null)) {
			double [] parameters_pull_full =  parameters_full.clone();
			for (int i = 0; i < 3; i++)	{
				parameters_pull_full[ErsCorrection.DP_DSX +  i] =  scene_xyzatr_pull[0][i]; 
				parameters_pull_full[ErsCorrection.DP_DSAZ +  i] = scene_xyzatr_pull[1][i]; 
				// 			parameters_pull = 
			}
			parameters_pull = new double [par_indices.length];
			for (int i = 0; i < par_indices.length; i++) {
				parameters_pull[i] = parameters_pull_full[par_indices[i]];
			}
		}
		
		if (vector_XYSDS != null) {// skip when used for the motion blur vectors, not LMA
			setSamplesWeights(vector_XYSDS,  // not regularized yet ! // 3d updated
				same_weights); // final boolean same_weights) // same weight if > 0

		} else {
			weights = null; // new double[2 * centers.length];
		}

		last_jt = new double [parameters_vector.length][];
		if (debug_level > 1) {
			System.out.println("prepareLMA() 1");
		}
		double [] fx = getFxDerivs(
				parameters_vector, // double []         vector,
				last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
				scenesCLT[1],      // final QuadCLT     scene_QuadClt,
				scenesCLT[0],      // final QuadCLT     reference_QuadClt,
				debug_level);      // final int         debug_level)
		if (vector_XYSDS == null) {
			return; // for MB vectors (noLMA)
		}
		
		double [][] wjtj = getWJtJlambda( // USED in lwir all NAN
				0.0,               // final double      lambda,
				last_jt);               // final double [][] jt) all 0???
		for (int i = 0; i < parameters_vector.length; i++) {
			int indx = num_samples + i;
			if (wjtj[i][i] == 0) {  // why is it zero (leading to NaN)
				weights[indx] = 0;
			} else {
				weights[indx] = param_regweights[par_indices[i]]/Math.sqrt(wjtj[i][i]);
			}
		}
		normalizeWeights(); // make full weight == 1.0; pure_weight <= 1.0;
		// remeasure fx - now with regularization terms.
		if (debug_level > 1) {
			System.out.println("prepareLMA() 2");
		}
		fx = getFxDerivs(
				parameters_vector, // double []         vector,
				last_jt,                // final double [][] jt, // should be null or initialized with [vector.length][]
				scene_QuadClt,     // final QuadCLT     scene_QuadClt,
				reference_QuadClt, // final QuadCLT     reference_QuadClt,
				debug_level);      // final int         debug_level)
		// Why y_vector starts with initial value of fx??? 
		y_vector = fx.clone();
		for (int i = 0; i < vector_XYSDS.length; i++) {
			if (vector_XYSDS[i] != null){
				y_vector[num_components * i + 0] += vector_XYSDS[i][0];
				y_vector[num_components * i + 1] += vector_XYSDS[i][1];
				if (num_components > 2) { // ****************************** [3] - disparity? Was BUG: [2] !
					y_vector[num_components * i + 2] += vector_XYSDS[i][3]; // vector_XYSDS[i][2]; 
				}
				if (s_vector != null) {
					s_vector[i] = vector_XYSDS[i][2];
				}
			}
		}
			
		if (parameters_pull != null){
			for (int i = 0; i < par_indices.length; i++) {
				y_vector [i + num_components * macrotile_centers.length] = parameters_pull[i]; // - parameters_initial[i]; // scale will be combined with weights
			}			
		}
		
		last_rms = new double [2];
		last_ymfx = getYminusFxWeighted(
				fx, // final double []   fx,
				last_rms); // final double []   rms_fp // null or [2]
		initial_rms = last_rms.clone();
		good_or_bad_rms = this.last_rms.clone();
			
	}
	
	
	
	public void prepareLMA(
			final double []   scene_xyz0,     // camera center in world coordinates (or null to use instance)
			final double []   scene_atr0,     // camera orientation relative to world frame (or null to use instance)
			final double []   scene_xyz_pull, // if both are not null, specify target values to pull to 
			final double []   scene_atr_pull, // 
			// reference atr, xyz are considered 0.0 - not anymore?
			final QuadCLT     scene_QuadClt,
			final QuadCLT     reference_QuadClt,
			final boolean[]   param_select,
			final double []   param_regweights,
//			final double [][] vector_XYS,  // optical flow X,Y, confidence obtained from the correlate2DIterate()
			// now includes optional Disparity as the last element (for num_components==3) 
			final double [][] vector_XYSDS,// optical flow X,Y, confidence obtained from the correlate2DIterate()
			final double [][] centers,     // macrotile centers (in pixels and average disparities
			final boolean     same_weights,
			boolean           first_run,
			final int         debug_level)
	{
		scenesCLT = new QuadCLT [] {reference_QuadClt, scene_QuadClt};
		par_mask = param_select;
		macrotile_centers = centers;
		num_samples = num_components * centers.length;
		ErsCorrection ers_ref =   reference_QuadClt.getErsCorrection();
		ErsCorrection ers_scene = scene_QuadClt.getErsCorrection();
		final double []   scene_xyz = (scene_xyz0 != null) ? scene_xyz0 : ers_scene.camera_xyz;
		final double []   scene_atr = (scene_atr0 != null) ? scene_atr0 : ers_scene.camera_atr;
		final double []   reference_xyz = new double[3];
		final double []   reference_atr = new double[3];
		double [] full_parameters_vector = new double [] {
				0.0,                             0.0,                             0.0,
				ers_ref.ers_watr_center_dt[0],   ers_ref.ers_watr_center_dt[1],   ers_ref.ers_watr_center_dt[2],
				ers_ref.ers_wxyz_center_dt[0],   ers_ref.ers_wxyz_center_dt[1],   ers_ref.ers_wxyz_center_dt[2],
				reference_atr[0],                reference_atr[1],                reference_atr[2],
				reference_xyz[0],                reference_xyz[1],                reference_xyz[2],
				ers_scene.ers_watr_center_dt[0], ers_scene.ers_watr_center_dt[1], ers_scene.ers_watr_center_dt[2],
				ers_scene.ers_wxyz_center_dt[0], ers_scene.ers_wxyz_center_dt[1], ers_scene.ers_wxyz_center_dt[2],
				scene_atr[0],                    scene_atr[1],                    scene_atr[2],
				scene_xyz[0],                    scene_xyz[1],                    scene_xyz[2]};
		parameters_full = full_parameters_vector.clone();
		if ((vector_XYSDS != null) && (first_run || (backup_parameters_full == null))) {
			backup_parameters_full = full_parameters_vector.clone();
		}
		int num_pars = 0;
		for (int i = 0; i < par_mask.length; i++) if (par_mask[i]) num_pars++;
		par_indices = new int [num_pars];
		num_pars = 0;
		for (int i = 0; i < par_mask.length; i++) {
			if (par_mask[i]) par_indices[num_pars++] = i;
		}
		parameters_vector = new double [par_indices.length];
		for (int i = 0; i < par_indices.length; i++) {
			parameters_vector[i] = full_parameters_vector[par_indices[i]];
		}

		if ((scene_xyz_pull != null) && (scene_atr_pull != null)) {
			double [] parameters_pull_full =  parameters_full.clone();
			for (int i = 0; i < 3; i++)	{
				parameters_pull_full[ErsCorrection.DP_DSX +  i] =  scene_xyz_pull[i]; 
				parameters_pull_full[ErsCorrection.DP_DSAZ +  i] = scene_atr_pull[i]; 
				// 			parameters_pull = 
			}
			parameters_pull = new double [par_indices.length];
			for (int i = 0; i < par_indices.length; i++) {
				parameters_pull[i] = parameters_pull_full[par_indices[i]];
			}
		}
		
		if (vector_XYSDS != null) {// skip when used for the motion blur vectors, not LMA
			setSamplesWeights(vector_XYSDS,  // not regularized yet ! // 3d updated
					same_weights); // final boolean same_weights) // same weight if > 0
		} else {
			weights = null; // new double[2 * centers.length];
		}

		last_jt = new double [parameters_vector.length][];
		if (debug_level > 1) {
			System.out.println("prepareLMA() 1");
		}
		double [] fx = getFxDerivs(
				parameters_vector, // double []         vector,
				last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
				scenesCLT[1],      // final QuadCLT     scene_QuadClt,
				scenesCLT[0],      // final QuadCLT     reference_QuadClt,
				debug_level);      // final int         debug_level)
		if (vector_XYSDS == null) {
			return; // for MB vectors (noLMA)
		}
		
		double [][] wjtj = getWJtJlambda( // USED in lwir all NAN
				0.0,               // final double      lambda,
				last_jt);               // final double [][] jt) all 0???
		for (int i = 0; i < parameters_vector.length; i++) {
			int indx = num_samples + i;
			if (wjtj[i][i] == 0) {  // why is it zero (leading to NaN)
				weights[indx] = 0;
			} else {
				weights[indx] = param_regweights[par_indices[i]]/Math.sqrt(wjtj[i][i]);
			}
		}
		normalizeWeights(); // make full weight == 1.0; pure_weight <= 1.0;
		// remeasure fx - now with regularization terms.

		if (debug_level > 1) {
			System.out.println("prepareLMA() 2");
		}
		fx = getFxDerivs(
				parameters_vector, // double []         vector,
				last_jt,                // final double [][] jt, // should be null or initialized with [vector.length][]
				scene_QuadClt,     // final QuadCLT     scene_QuadClt,
				reference_QuadClt, // final QuadCLT     reference_QuadClt,
				debug_level);      // final int         debug_level)
		// Why y_vector starts with initial value of fx??? 
		y_vector = fx.clone();
		for (int i = 0; i < vector_XYSDS.length; i++) {
			if (vector_XYSDS[i] != null){
				y_vector[num_components * i + 0] += vector_XYSDS[i][0];
				y_vector[num_components * i + 1] += vector_XYSDS[i][1];
				if (num_components > 2) {
					y_vector[num_components * i + 2] += vector_XYSDS[i][2];
				}
				
			}
		}
		if (parameters_pull != null){
			for (int i = 0; i < par_indices.length; i++) {
//				y_vector [i + num_components * macrotile_centers.length] += parameters_pull[i]; // - parameters_initial[i]; // scale will be combined with weights
				y_vector [i + num_components * macrotile_centers.length] = parameters_pull[i]; // - parameters_initial[i]; // scale will be combined with weights
			}			
		}
		
		last_rms = new double [2];
		last_ymfx = getYminusFxWeighted(
				fx, // final double []   fx,
				last_rms); // final double []   rms_fp // null or [2]
		initial_rms = last_rms.clone();
		good_or_bad_rms = this.last_rms.clone();
	}
	
	public int runLma( // <0 - failed, >=0 iteration number (1 - immediately)
			double lambda,           // 0.1
			double lambda_scale_good,// 0.5
			double lambda_scale_bad, // 8.0
			double lambda_max,       // 100
			double rms_diff,         // 0.001
			int    num_iter,         // 20
			boolean last_run,
			int    debug_level)
	{
		boolean [] rslt = {false,false};
		this.last_rms = null; // remove?
		int iter = 0;
		for (iter = 0; iter < num_iter; iter++) {
			rslt =  lmaStep(
					lambda,
					rms_diff,
					debug_level);
			if (rslt == null) {
				return -1; // false; // need to check
			}
			if (debug_level > 1) {
				System.out.println("LMA step "+iter+": {"+rslt[0]+","+rslt[1]+"} full RMS= "+good_or_bad_rms[0]+
						" ("+initial_rms[0]+"), pure RMS="+good_or_bad_rms[1]+" ("+initial_rms[1]+") + lambda="+lambda);
			}
			if (rslt[1]) {
				break;
			}
			if (rslt[0]) { // good
				lambda *= lambda_scale_good;
			} else {
				lambda *= lambda_scale_bad;
				if (lambda > lambda_max) {
					break; // not used in lwir
				}
			}
			if (dbg_prefix != null) {
				showDebugImage(dbg_prefix+"-"+iter+(rslt[0]?"-GOOD":"-BAD"));
			}

		}
		if (rslt[0]) { // better
			if (iter >= num_iter) { // better, but num tries exceeded
				if (debug_level > 1) System.out.println("Step "+iter+": Improved, but number of steps exceeded maximal");
			} else {
				if (debug_level > 1) System.out.println("Step "+iter+": LMA: Success");
			}

		} else { // improved over initial ?
			if (last_rms[0] < initial_rms[0]) { // NaN
				rslt[0] = true;
				if (debug_level > 1) System.out.println("Step "+iter+": Failed to converge, but result improved over initial");
			} else {
				if (debug_level > 1) System.out.println("Step "+iter+": Failed to converge");
			}
		}
		if (dbg_prefix != null) {
			showDebugImage(dbg_prefix+"-FINAL");
		}
		boolean show_intermediate = true;
		if (show_intermediate && (debug_level > 0)) {
			System.out.println("LMA: full RMS="+last_rms[0]+" ("+initial_rms[0]+"), pure RMS="+last_rms[1]+" ("+initial_rms[1]+") + lambda="+lambda);
		}
		if (debug_level > 2){ 
			String [] lines1 = printOldNew(false); // boolean allvectors)
			System.out.println("iteration="+iter);
			for (String line : lines1) {
				System.out.println(line);
			}
		}
		if (debug_level > 0) {
			if ((debug_level > 1) ||  last_run) { // (iter == 1) || last_run) {
				if (!show_intermediate) {
					System.out.println("LMA: iter="+iter+",   full RMS="+last_rms[0]+" ("+initial_rms[0]+"), pure RMS="+last_rms[1]+" ("+initial_rms[1]+") + lambda="+lambda);
				}
				String [] lines = printOldNew(false); // boolean allvectors)
				for (String line : lines) {
					System.out.println(line);
				}
			}
		}
		if ((debug_level > -2) && !rslt[0]) { // failed
			if ((debug_level > 1) || (iter == 1) || last_run) {
				System.out.println("LMA failed on iteration = "+iter);
				String [] lines = printOldNew(true); // boolean allvectors)
				for (String line : lines) {
					System.out.println(line);
				}
			}
			System.out.println();
		}

		return rslt[0]? iter : -1;
	}
	
	private boolean [] lmaStep(
			double lambda,
			double rms_diff,
			int debug_level) {
		boolean [] rslt = {false,false};
		// maybe the following if() branch is not needed - already done in prepareLMA !
		if (this.last_rms == null) { //first time, need to calculate all (vector is valid)
			last_rms = new double[2];
			if (debug_level > 1) {
				System.out.println("lmaStep(): first step");
			}
			double [] fx = getFxDerivs(
					parameters_vector, // double []         vector,
					last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
					scenesCLT[1],      // final QuadCLT     scene_QuadClt,
					scenesCLT[0],      // final QuadCLT     reference_QuadClt,
					debug_level);      // final int         debug_level)
			last_ymfx = getYminusFxWeighted(
					fx, // final double []   fx,
					last_rms); // final double []   rms_fp // null or [2]
			this.initial_rms = this.last_rms.clone();
			this.good_or_bad_rms = this.last_rms.clone();
			if (dbg_prefix != null) { // -1) { // temporary
				String      dbg_title= scenesCLT[0].getImageName()+"-"+scenesCLT[1].getImageName()+"-FxDerivs";
				int         dbg_num_rows = 3;
				boolean     dbg_show_reg = true;
				boolean     dbg_show =     true;
				showFxDerivs(
						fx,           // double []   fX,
						last_jt,      // double [][] jt,
						0,            // int         num_rows, // <=0 - stack 3?
						dbg_show_reg, // boolean     show_reg,  // extra row for regularization parameters
						dbg_show,     // boolean     show,
						dbg_title);   // String      title);
				showFxDerivs(
						fx,           // double []   fX,
						last_jt,      // double [][] jt,
						dbg_num_rows, // int         num_rows, // <=0 - stack 3?
						dbg_show_reg, // boolean     show_reg,  // extra row for regularization parameters
						dbg_show, // boolean     show,
						dbg_title); // String      title);
				
				double      debug_delta = 1e-4;
				boolean     show_img = true;
/*				debugDerivs(
						parameters_vector, // double []         vector,
						scenesCLT[1],      // final QuadCLT     scene_QuadClt,
						scenesCLT[0],      // final QuadCLT     reference_QuadClt,
						debug_delta,       // final double      delta,
						show_img,          // final boolean     show_img,
						debug_level);      // final int         debugLevel) */
				/*
				dbgYminusFxWeight(
						this.last_ymfx,
						this.weights,
						"Initial_y-fX_after_moving_objects");
                */
			}
			if (last_ymfx == null) {
				return null; // need to re-init/restart LMA
			}
			// TODO: Restore/implement
			if (debug_level > 3) {
				/*
				 dbgJacobians(
							corr_vector, // GeometryCorrection.CorrVector corr_vector,
							1E-5, // double delta,
							true); //boolean graphic)
				*/
			}
			if (dbg_prefix != null) {
				showDebugImage(dbg_prefix+"-INIT");
			}
		}
		Matrix y_minus_fx_weighted = new Matrix(this.last_ymfx, this.last_ymfx.length);

		Matrix wjtjlambda = new Matrix(getWJtJlambda(
				lambda, // *10, // temporary
				this.last_jt)); // double [][] jt)
		
		if (debug_level>2) {
			System.out.println("JtJ + lambda*diag(JtJ");
			wjtjlambda.print(18, 6);
		}
		Matrix jtjl_inv = null;
		try {
			jtjl_inv = wjtjlambda.inverse(); // check for errors
		} catch (RuntimeException e) {
			rslt[1] = true;
			if (debug_level > 0) {
				System.out.println("Singular Matrix!");
			}

			return rslt;
		}
		if (debug_level>2) {
			System.out.println("(JtJ + lambda*diag(JtJ).inv()");
			jtjl_inv.print(18, 6);
		}
//last_jt has NaNs
		Matrix jty = (new Matrix(this.last_jt)).times(y_minus_fx_weighted);
		if (debug_level>2) {
			System.out.println("Jt * (y-fx)");
			jty.print(18, 6);
		}
		
		
		Matrix mdelta = jtjl_inv.times(jty);
		if (debug_level>2) {
			System.out.println("mdelta");
			mdelta.print(18, 10);
		}

		double scale = 1.0;
		double []  delta =      mdelta.getColumnPackedCopy();
		double []  new_vector = parameters_vector.clone();
		for (int i = 0; i < parameters_vector.length; i++) {
			new_vector[i] += scale * delta[i];
		}
		
		double [] fx = getFxDerivs(
				new_vector, // double []         vector,
				last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
				scenesCLT[1],      // final QuadCLT     scene_QuadClt,
				scenesCLT[0],      // final QuadCLT     reference_QuadClt,
				debug_level);      // final int         debug_level)
		double [] rms = new double[2];
		last_ymfx = getYminusFxWeighted(
//				vector_XYS, // final double [][] vector_XYS,
				fx, // final double []   fx,
				rms); // final double []   rms_fp // null or [2]
		if (debug_level > 2) {
			/*
			dbgYminusFx(this.last_ymfx, "next y-fX");
			dbgXY(new_vector, "XY-correction");
			*/
		}

		if (last_ymfx == null) {
			return null; // need to re-init/restart LMA
		}

		this.good_or_bad_rms = rms.clone();
		if (rms[0] < this.last_rms[0]) { // improved
			rslt[0] = true;
			rslt[1] = rms[0] >=(this.last_rms[0] * (1.0 - rms_diff));
			this.last_rms = rms.clone();

			this.parameters_vector = new_vector.clone();
			if (debug_level > 2) {
				// print vectors in some format
				/*
				System.out.print("delta: "+corr_delta.toString()+"\n");
				System.out.print("New vector: "+new_vector.toString()+"\n");
				System.out.println();
				*/
			}
		} else { // worsened
			rslt[0] = false;
			rslt[1] = false; // do not know, caller will decide
			// restore state
			fx = getFxDerivs(
					parameters_vector, // double []         vector,
					last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
					scenesCLT[1],      // final QuadCLT     scene_QuadClt,
					scenesCLT[0],      // final QuadCLT     reference_QuadClt,
					debug_level);      // final int         debug_level)
			last_ymfx = getYminusFxWeighted(
					fx, // final double []   fx,
					this.last_rms); // final double []   rms_fp // null or [2]
			if (last_ymfx == null) {
				return null; // need to re-init/restart LMA
			}
			if (debug_level > 2) {
				/*
				 dbgJacobians(
							corr_vector, // GeometryCorrection.CorrVector corr_vector,
							1E-5, // double delta,
							true); //boolean graphic)
							*/
			}
		}
		return rslt;
	}
	
	private static double [][][] setEigenTransform(
			final double   eig_max_sqrt,
			final double   eig_min_sqrt,
			final double [][] eigen){ // [tilesX*tilesY]{lamb0_x,lamb0_y, lamb0, lamb1} eigenvector0[x,y],lam0,lam1
		if (eigen == null) {
			return null;
		}
		final double [][][] transform = new double[eigen.length][][];
		final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		final double k0 = 1.0/eig_max_sqrt;
		
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int iTile = ai.getAndIncrement(); iTile < eigen.length; iTile = ai.getAndIncrement()) if (eigen[iTile] != null){
						double [][] am = {{eigen[iTile][0],eigen[iTile][1]},{eigen[iTile][1],-eigen[iTile][0]}};
						if (!Double.isNaN(am[0][0]) && !Double.isNaN(am[0][1])) {
							transform[iTile] = new double [2][2];
							for (int i = 0; i < 2; i++) {
								double k = Math.max(0, 1/Math.max(eig_min_sqrt, Math.sqrt(eigen[iTile][2+i]))-k0);
								for (int j = 0; j < 2; j++) {
									transform[iTile][i][j] = k* am[i][j];  
								}		
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return transform;
	}
	
	private void setSamplesWeights(
			final double [][] vector_XYSDS,  // not regularized yet
			final boolean same_weights) // same weight if > 0
	{
		//num_components 2 - old, 3 - with disparity
		this.weights = new double [num_samples + parameters_vector.length];
		
		final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		final AtomicInteger ati = new AtomicInteger(0);
		final AtomicInteger anum_defined = new AtomicInteger(0);
		final double [] sw_arr = new double [threads.length];
		double sum_weights;
		final int num_types = (num_components > 2) ? 2 : 1;
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					int thread_num = ati.getAndIncrement();
					for (int iMTile = ai.getAndIncrement(); iMTile < vector_XYSDS.length; iMTile = ai.getAndIncrement()) if (vector_XYSDS[iMTile] != null){
						double w = vector_XYSDS[iMTile][2];
						if ((eig_trans != null) && (eig_trans[iMTile] == null)) {
							w = 0;
							continue;
						}
						if (Double.isNaN(w)) {
							w = 0;
							continue;
						}
						if (w > 0) {
							if (same_weights) {
								w = 1.0;
							}
							anum_defined.getAndIncrement();
							weights[num_components  * iMTile] = w;
							sw_arr[thread_num] += 2*w;
							//disparity_weight
							if (num_types > 1) {
								w = vector_XYSDS[iMTile][4] * disparity_weight;
								if (Double.isNaN(w) || Double.isNaN(vector_XYSDS[iMTile][3])) {
									w = 0;
									vector_XYSDS[iMTile][3] = 0.0; // disparity
								}
								weights[num_components * iMTile + 2] = w;
								sw_arr[thread_num] += w;
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		sum_weights = 0.0;
		for (double w:sw_arr) {
			sum_weights += w;
		}
		num_defined = anum_defined.get();
		this.sum_weights = sum_weights;
		
		if (sum_weights <= 1E-8) {
			System.out.println("!!!!!! setSamplesWeights(): sum_weights=="+sum_weights+" <= 1E-8");
		}
		ai.set(0);
		final double s = 1.0/sum_weights; // Was 0.5 - already taken care of
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int iMTile = ai.getAndIncrement(); iMTile < vector_XYSDS.length; iMTile = ai.getAndIncrement()) if (vector_XYSDS[iMTile] != null){
						weights[num_components * iMTile] *= s;
						weights[num_components * iMTile + 1] = weights[num_components * iMTile];
						if (num_components > 2) {
							weights[num_components * iMTile + 2] *= s;
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		pure_weight = 1.0;
	}

	private void normalizeWeights()
	{
		final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
		final AtomicInteger ai =    new AtomicInteger(0);
		final AtomicInteger ati =    new AtomicInteger(0);
		final double [] sum_weight = new double [threads.length];
		ati.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					int thread_num = ati.getAndIncrement();
					for (int i = ai.getAndIncrement(); i < num_samples; i = ai.getAndIncrement()){
						if (Double.isNaN(weights[i])) {
							System.out.println("normalizeWeights(): weights["+i+"== NaN: 2");
							weights[i] = 0;
						}
						sum_weight[thread_num] += weights[i];
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		double sum_weight_pure = 0;
		for (double sw:sum_weight)	sum_weight_pure += sw;
		double full_weight = sum_weight_pure;
		for (int i = 0; i < par_indices.length; i++) {
			int indx =  num_samples + i;
			if (Double.isNaN(weights[indx])) {
				System.out.println("normalizeWeights(): weights["+indx+"] == NaN: 1.5, i="+i);
				weights[indx] = 0;
			}
			full_weight += weights[indx];
		}
		pure_weight = sum_weight_pure/full_weight;
		final double s = 1.0/full_weight;
		if (Double.isNaN(s)) {
			System.out.println("normalizeWeights(): s == NaN  : 2");
		}
		ai.set(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int i = ai.getAndIncrement(); i < weights.length; i = ai.getAndIncrement()){
						weights[i] *= s;
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
	}
	
	
	/**
	 * Combine unmodified parameters with these ones, using parameters mask
	 * @param vector variable-length adjustable vector, corresponding to a parameter mask
	 * @return full parameters vector combining adjusted and fixed parameters
	 */
	private double [] getFullVector(double [] vector) {
		double [] full_vector = parameters_full.clone();
		for (int i = 0; i < par_indices.length; i++) {
			full_vector[par_indices[i]] = vector[i];
		}
		return full_vector;
	}
	
	
	// if eig_trans != null, transform derivatives, keep Fx
	
	public ImagePlus showFxDerivs(
			double []   fX,
			double [][] jt,
			int         num_rows, // <=0 - stack
			boolean     show_reg,  // extra row for regularization parameters
			boolean     show,
			String      title) {
		int gap =    1;
		double [][][] scales = {
				{{320,320}, {256,256} ,{0,1}},	//y                      *
				{{320,320}, {256,256} ,{0,1}},	//fX                     *
				{{ 0, 2.5},  { 0, 2.5} , {0,1}},	//y-fX                   *
				{{0,   75},  { 0, 75},  {0,1}},  //hor *
				{{0,   75},  { 0, 75},  {0,1}},  //vert *
				{{ 0,100},  { 0,100},  {0,1}},  //"pX",      // (pix)     0
				{{ 0,100},  { 0,100},  {0,1}},  //"pY",            // (pix)     1
				{{0,100},   { 0,100},  {0,1}},  //"disp",          // (pix)     2
				{{0,100},   { 0,100},  {0,1}},  //"ers_vaz_ref",   // (rad/sec) 3
				{{0,100},   { 0,100},  {0,1}},  //"ers_vtl_ref",   // (rad/sec) 4
				{{0,100},   { 0,100},  {0,1}},	//"ers_vrl_ref",   // (rad/sec) 5
				{{0,100},   { 0,100},  {0,1}}, //"ers_dvx_ref",   // (m/s)     6
				{{0,100},   { 0,100},  {0,1}}, //"ers_dvy_ref",   // (m/s)     7
				{{0,100},   { 0,100},  {0,1}}, //"ers_dvz_ref",   // (m/s)     8
				{{-1120,20},{ 70,50},  {0,1}}, //"azimuth_ref",   // (rad)     9 *
				{{-70,  50},{-1120,20},{0,1}}, //"tilt_ref",      // (rad)    10 *
				{{0,500},   { 0,500},  {0,1}}, //"roll_ref",      // (rad)    11 *
				{{ 25,2.5}, {-1.5,0.5},{0,1}}, //"X_ref",         // (m)      12 *
				{{-1.5,0.5},{-25,2.5}, {0,1}}, //"Y_ref",         // (m)	  13 *
				{{ 0, 7.5}, { 0, 7.5}, {0,1}}, //"Z_ref",         // (m)      14 *
				{{0,100},   { 0,100},  {0,1}}, //"ers_vaz_scene", // (rad/sec)15
				{{0,100},   { 0,100},  {0,1}}, //"ers_vtl_scene", // (rad/sec)16
				{{0,100},   { 0,100},  {0,1}}, //"ers_vrl_scene", // (rad/sec)17
				{{0,100},   { 0,100},  {0,1}}, //"ers_dvx_scene", // (m/s)    18
				{{0,100},   { 0,100},  {0,1}}, //"ers_dvy_scene", // (m/s)    19
				{{0,100},   { 0,100},  {0,1}}, //"ers_dvz_scene", // (m/s)    20
				{{1120,20}, { -70,50}, {0,1}}, //"azimuth_scene", // (rad)    21 *
				{{70,  50}, {1120,20}, {0,1}}, //"tilt_scene",   // (rad)    22 *
				{{0,500},   { 0,500},  {0,1}}, //"Roll_scene",    // (rad)    23 *
				{{-25, 2.5},{ 1.5,0.5},{0,1}}, //"X_scene",       // (m)      24 *
				{{ 1.5,0.5},{25, 2.5}, {0,1}}, //"Y_scene",       // (m)	  25 *
				{{ 0, 7.5}, { 0, 7.5},  {0,1}}  //"Z_scene"        // (m)      26 *
		};

		String [] titles_pre = {"y", "fx", "y-fx", "hor", "vert"};
		int num_pre = titles_pre.length;
		int length = num_samples/num_components;
		int width =  tilesX;
		int height0 = length/width;
		int height = height0 + (show_reg? 1 : 0);
		int num_pars0 =  jt.length;
		
		int num_pars1 =  jt.length+num_pre;
		double [][] jt_ext = new double [num_pars1][];
		jt_ext[0] = y_vector;
		jt_ext[1] = fX;
		jt_ext[2] = y_vector.clone();
		for (int i = 0; i < jt_ext[2].length; i++) {
			jt_ext[2][i] -= jt_ext[1][i];
		}
		double [][][] img_stack = new double [num_components][num_pars1][width*height];
		String [] titles = new String [num_pars1];
		for (int i = 0; i < num_pre; i++) {
			titles[i] = titles_pre[i];
		}
		
		for (int i = 0; i < par_indices.length; i++) {
			titles[i+num_pre] = ErsCorrection.DP_DERIV_NAMES[par_indices[i]];
			jt_ext[i+num_pre] = jt[i];
		}
		// combine rotate and translate 
		// See if both azimuth_scene and X_scene are present
		int [][] hor_vert_indices = {{-1,-1},{-1,-1}}; 
		for (int i = 0; i < par_indices.length; i++) {
			if (par_indices[i] ==ErsCorrection.DP_DSAZ) hor_vert_indices[0][0] = i;
			if (par_indices[i] ==ErsCorrection.DP_DSX)  hor_vert_indices[0][1] = i;
			if (par_indices[i] ==ErsCorrection.DP_DSTL) hor_vert_indices[1][0] = i;
			if (par_indices[i] ==ErsCorrection.DP_DSY)  hor_vert_indices[1][1] = i;
		}
		for (int vnh = 0; vnh < hor_vert_indices.length; vnh++) if (( hor_vert_indices[vnh][0] >=0 ) && (hor_vert_indices[vnh][1] >= 0)){
			double [] sw = new double[2],swd = new double[2];
			for (int n = 0; n < hor_vert_indices[vnh].length; n++) {
				for (int i = 0; i < length; i++) {
					int indx = num_components * i + vnh;
					double w = weights[indx];
					double d = jt[hor_vert_indices[vnh][n]][indx];
					sw [n]+=w;
					swd[n]+=w*d;
				}
				swd[n] /= sw[n];
			}
			double mult_second = swd[0] / swd[1];
			int jindex = 3 + vnh; 
			jt_ext[jindex] = new double [weights.length];
			for (int i = 0; i < weights.length; i++) {
				jt_ext[jindex][i] = jt[hor_vert_indices[vnh][0]][i] - mult_second * jt[hor_vert_indices[vnh][1]][i];
			}
		}
		String [] titles_top = new String [num_components];
		titles_top[0] = "pX";
		titles_top[1] = "pY";
		if (num_components > 2) {
			titles_top[2] = "Disp";
		}
		for (int np = 0; np < num_pars1; np++){
			for (int nc = 0; nc < num_components; nc++) {
				Arrays.fill(img_stack[nc][np], Double.NaN);
				if (jt_ext[np] != null) {
					for (int npix = 0; npix < length; npix++) {
						int sindx = nc+num_components*npix;
						if (weights[sindx] > 0) {
							if ((img_stack[nc]== null) || (img_stack[nc][np]== null) ||
									(jt_ext[np]== null)) {
								System.out.println("showFxDerivs(): Null pointer");

							}
							img_stack[nc][np][npix] = jt_ext[np][sindx]; // null pointer
						}
					}
					if (show_reg) {
						for (int i = 0; i < num_pars0; i++) {
							img_stack[nc][np][length+i] = jt_ext[np][num_samples+i]; 
						}
					}
				}
			}
		}
		if (num_rows <= 0) {
			ImagePlus imp_stack = ShowDoubleFloatArrays.showArraysHyperstack(
					img_stack,  // double[][][] pixels, 
					width,      // int          width, 
					title,      // "terrain_vegetation_render.tiff", // String       title, "time_derivs-rt"+diff_time_rt+"-rxy"+diff_time_rxy,
					titles,     // String []    titles, // all slices*frames titles or just slice titles or null
					titles_top, // String []    frame_titles, // frame titles or null
					show);      // boolean      show)
			return imp_stack;

		}
		// TODO: add translate+rotate combo and scale factors to see all tiles simultaneously (as static members?)
		int num_cols = (int) Math.ceil(1.0*num_pars1/num_rows); 
		int full_width =  num_cols * (width +  gap) - gap;
		int full_height = num_rows * (height + gap) - gap;
		double [][] tiled_stack = new double [num_components][full_width*full_height];
		for (int nc = 0; nc < tiled_stack.length; nc++) {
			Arrays.fill (tiled_stack[nc], Double.NaN);
		}
		for (int ntile = 0; ntile < num_pars1; ntile++) {
			int tile_row = ntile / num_cols; 
			int tile_col = ntile % num_cols;
			int tl_index = tile_row * (height + gap) * full_width + tile_col * (width + gap); 
			for (int nc = 0; nc < tiled_stack.length; nc++) {
				int scale_index = (ntile<num_pre)? ntile: (par_indices[ntile-num_pre] + num_pre);
				double offs = scales[scale_index][nc][0];
				double div =  scales[scale_index][nc][1];
				for (int nrow = 0; nrow < height; nrow++) {
					System.arraycopy(
							img_stack[nc][ntile],
							nrow * width,
							tiled_stack[nc],
							tl_index + nrow * full_width,
							width);
					if (nrow < height0) {
						for (int i = 0; i < width; i++) {
							int indx = tl_index + nrow * full_width + i;
							tiled_stack[nc][indx]= (tiled_stack[nc][indx]- offs)/div;
						}
					}
				}
			}			
		}
		ImagePlus imp_stack = ShowDoubleFloatArrays.makeArrays(
				tiled_stack, // double[][] pixels,
				full_width,  //int width,
				full_height, // int height,
				title+"_tiled",       // String title,
				titles_top); // String [] titles)
    	imp_stack.getProcessor().resetMinAndMax();
		if (show) {
			imp_stack.show();
		}
		return imp_stack;
	}
	
	public double [][] getFxDerivsDelta(
			double []         vector,
			final double[]    deltas, // same length as vector, or [1] - then use for all
			final QuadCLT     scene_QuadClt,
			final QuadCLT     reference_QuadClt,
			final int         debug_level) {
		int dbg_n = -2256; // 1698; // -2486;
		final boolean mb_mode = (weights == null); // mb_mode should have num_components==2 !
		final int weights_length = mb_mode ?  (2 * macrotile_centers.length) : weights.length; // OK to keep 2
		double [][] jt =  new double [vector.length][weights_length];
		for (int nv = 0; nv < vector.length; nv++) {
			double delta = (nv < deltas.length) ? deltas[nv] : deltas[deltas.length -1];
			if (nv == dbg_n) {
				System.out.println("getFxDerivsDelta(): nv="+nv);
			}
			double [] vpm = vector.clone();
			vpm[nv]+= 0.5*delta;
			double [] fx_p =  getFxDerivs(
					vpm,
					null,              // final double [][] jt, // should be null or initialized with [vector.length][]
					scene_QuadClt,     // final QuadCLT     scene_QuadClt,
					reference_QuadClt, // final QuadCLT     reference_QuadClt,
					debug_level);
			vpm[nv]-= delta;
			double [] fx_m =  getFxDerivs(
					vpm,
					null, // final double [][] jt, // should be null or initialized with [vector.length][]
					scene_QuadClt,     // final QuadCLT     scene_QuadClt,
					reference_QuadClt, // final QuadCLT     reference_QuadClt,
					debug_level);
			for (int i = 0; i <weights_length; i++) if ((weights==null) ||  (weights[i] > 0)) {
				jt[nv][i] = (fx_p[i]-fx_m[i])/delta;
			}
		}
		return jt;
	}
	
	public void debugDerivs(
			double []         vector,
			final QuadCLT     scene_QuadClt,
			final QuadCLT     reference_QuadClt,
			final double      delta,
			final boolean     show_img,
			final int         debugLevel) {
        double [] delta_scale ={
				1.0,  //"pX",            // (pix)     0
				1.0,  //"pY",            // (pix)     1
				1.0,  //"disp",          // (pix)     2
				1.0,  //"ers_vaz_ref",   // (rad/sec) 3
				1.0,  //"ers_vtl_ref",   // (rad/sec) 4
				1.0,  //"ers_vrl_ref",   // (rad/sec) 5
				1.0,  //"ers_dvx_ref",   // (m/s)     6
				1.0,  //"ers_dvy_ref",   // (m/s)     7
				1.0,  //"ers_dvz_ref",   // (m/s)     8
				1.0,  //"azimuth_ref",   // (rad)     9 *
				1.0,  //"tilt_ref",      // (rad)    10 *
				1.0,  //"roll_ref",      // (rad)    11 *
				1.0,  //"X_ref",         // (m)      12 *
				1.0,  //"Y_ref",         // (m)	  13 *
				1.0,  //"Z_ref",         // (m)      14 *
				1.0,  //"ers_vaz_scene", // (rad/sec)15
				1.0,  //"ers_vtl_scene", // (rad/sec)16
				1.0,  //"ers_vrl_scene", // (rad/sec)17
				1.0,  //"ers_dvx_scene", // (m/s)    18
				1.0,  //"ers_dvy_scene", // (m/s)    19
				1.0,  //"ers_dvz_scene", // (m/s)    20
				1.0,  //"azimuth_scene", // (rad)    21 *
				1.0,  //"tilt_scene",   // (rad)    22 *
				1.0,  //"Roll_scene",    // (rad)    23 *
				1.0,  //"X_scene",       // (m)      24 *
				1.0,  //"Y_scene",       // (m)	  25 *
				1.0   //"Z_scene"        // (m)      26 *
		};
//		int length = num_samples/num_components;
//		int width =  tilesX;
//		int height0 = length/width;
		double [] deltas = new double [vector.length];
		String [] par_names = new String[vector.length];
		for (int i = 0; i < deltas.length; i++) {
			deltas[i] = delta * delta_scale[par_indices[i]];
			par_names[i] =  ErsCorrection.DP_DERIV_NAMES[par_indices[i]];
		}
		String [] comp_names = {"dx", "dy","ddisp"};
		double [][] jt = new double [vector.length][];
		double [] fX = getFxDerivs(
				vector,            // final double []   vector,
				jt,                // final double [][] jt, // should be null or initialized with [vector.length][]
				scene_QuadClt,     // final QuadCLT     scene_QuadClt,
				reference_QuadClt, // final QuadCLT     reference_QuadClt,
				debugLevel);       // final int         debug_level)
		double  [][] jt_delta =  getFxDerivsDelta (
				vector,            // final double []   vector,
				deltas,            // final double[]    deltas, // same length as vector, or [1] - then use for all
				scene_QuadClt,     // final QuadCLT     scene_QuadClt,
				reference_QuadClt, // final QuadCLT     reference_QuadClt,
				debugLevel-10);    // final int         debug_level)
		double [][] jt_diff = new double [jt_delta.length][jt_delta[0].length];
		double max_err=0;
		double min_bad = 0.01;
		for (int n = 0; n < jt_diff.length; n++) {
			for (int w = 0; w < jt_diff[0].length; w++) if (weights[w] > 0){ // skip zero weights
				jt_diff[n][w] = jt[n][w] - jt_delta[n][w];
//				int [] ti = par_rindex[n];
				int ntile = w/num_components;
				int ncomp = w % num_components;
				if (Math.abs(jt_diff[n][w]) > max_err) {
					System.out.println("debugDerivs(): n="+n+" ("+par_names[n]+"), ntile = "+ntile +", comp="+comp_names[ncomp] +", diff="+jt_diff[n][w]+" jt="+jt[n][w]+" jt_delta="+jt_delta[n][w]);
				} else if (Math.abs(jt_diff[n][w]) > min_bad) {
					System.out.println("debugDerivs(): n="+n+" ("+par_names[n]+"), ntile = "+ntile +", comp="+comp_names[ncomp] +", diff="+jt_diff[n][w]+" jt="+jt[n][w]+" jt_delta="+jt_delta[n][w]);
				}
				max_err = Math.max(max_err, Math.abs(jt_diff[n][w]));
			}
		}
		System.out.println("delta = "+delta+", max_err = "+max_err);
		if (show_img) {
			String      dbg_title= scenesCLT[0].getImageName()+"-"+scenesCLT[1].getImageName();
			int         dbg_num_rows = 3;
			boolean     dbg_show_reg = true;
			boolean     dbg_show =     true;
			showFxDerivs(
					fX,           // double []   fX,
					jt,           // double [][] jt,
					0,            // int         num_rows, // <=0 - stack 3?
					dbg_show_reg, // boolean     show_reg,  // extra row for regularization parameters
					dbg_show,     // boolean     show,
					dbg_title+"-jt");   // String      title);
			showFxDerivs(
					fX,           // double []   fX,
					jt,           // double [][] jt,
					dbg_num_rows, // int         num_rows, // <=0 - stack 3?
					dbg_show_reg, // boolean     show_reg,  // extra row for regularization parameters
					dbg_show, // boolean     show,
					dbg_title+"-jt"); // String      title);
			
			showFxDerivs(
					fX,           // double []   fX,
					jt_delta,     // double [][] jt,
					0,            // int         num_rows, // <=0 - stack 3?
					dbg_show_reg, // boolean     show_reg,  // extra row for regularization parameters
					dbg_show,     // boolean     show,
					dbg_title+"-jt_delta_"+delta);   // String      title);
			showFxDerivs(
					fX,           // double []   fX,
					jt_delta,     // double [][] jt,
					dbg_num_rows, // int         num_rows, // <=0 - stack 3?
					dbg_show_reg, // boolean     show_reg,  // extra row for regularization parameters
					dbg_show, // boolean     show,
					dbg_title+"-jt_delta_"+delta); // String      title);
			
			showFxDerivs(
					fX,           // double []   fX,
					jt_diff,      // double [][] jt,
					0,            // int         num_rows, // <=0 - stack 3?
					dbg_show_reg, // boolean     show_reg,  // extra row for regularization parameters
					dbg_show,     // boolean     show,
					dbg_title+"-jt_diff_"+delta);   // String      title);
			showFxDerivs(
					fX,           // double []   fX,
					jt_diff,      // double [][] jt,
					dbg_num_rows, // int         num_rows, // <=0 - stack 3?
					dbg_show_reg, // boolean     show_reg,  // extra row for regularization parameters
					dbg_show, // boolean     show,
					dbg_title+"-jt_diff_"+delta); // String      title);
		}
		return;
	}
	
	
	private double [] getFxDerivs(
			double []         vector,
			final double [][] jt, // should be null or initialized with [vector.length][]
			final QuadCLT     scene_QuadClt,
			final QuadCLT     reference_QuadClt,
			final int         debug_level)
	{
		final double [] full_vector = getFullVector(vector);
		final ErsCorrection ers_ref =   reference_QuadClt.getErsCorrection();
		final ErsCorrection ers_scene = scene_QuadClt.getErsCorrection();
		final double [] scene_xyz =     new double[3];
		final double [] scene_atr  =    new double[3];
		final double [] reference_xyz = new double[3]; // will stay 0
		final double [] reference_atr = new double[3]; // will stay 0
		final boolean mb_mode = (weights == null); // mb_mode should have num_components==2 !
		final int weights_length = mb_mode ?  (2 * macrotile_centers.length) : weights.length; // OK to keep 2
		final double [] fx =       mb_mode ? null : (new double [weights_length]); // weights.length]; : weights.length :
		if (jt != null) {
			for (int i = 0; i < jt.length; i++) {
				jt[i] = new double [weights_length]; // weights.length];
			}
		}
		
		for (int i = 0; i <3; i++) {
			ers_ref.ers_watr_center_dt[i] =   full_vector[ErsCorrection.DP_DVAZ +  i];
			ers_ref.ers_wxyz_center_dt[i] =   full_vector[ErsCorrection.DP_DVX +   i];
			ers_scene.ers_watr_center_dt[i] = full_vector[ErsCorrection.DP_DSVAZ + i];
			ers_scene.ers_wxyz_center_dt[i] = full_vector[ErsCorrection.DP_DSVX +  i];
			reference_atr[i] = full_vector[ErsCorrection.DP_DAZ +  i];
			reference_xyz[i] = full_vector[ErsCorrection.DP_DX +  i];
			scene_atr[i] =     full_vector[ErsCorrection.DP_DSAZ +  i];
			scene_xyz[i] =     full_vector[ErsCorrection.DP_DSX +  i];
		}
		if (Double.isNaN(scene_xyz[0])) {
			System.out.println("getFxDerivs(): scene_xyz[0]=NaN");
			System.out.println();
		}
		ers_scene.setupERS();
		ers_ref.setupERS();
		final Matrix [] reference_matrices_inverse = ErsCorrection.getInterRotDeriveMatrices(
				reference_atr, // double [] atr);
				true);         // boolean invert));

		final Matrix [] scene_matrices_inverse = ErsCorrection.getInterRotDeriveMatrices(
				scene_atr, // double [] atr);
				true);         // boolean invert));

		final Matrix  scene_rot_matrix = ErsCorrection.getInterRotDeriveMatrices(
				scene_atr, // double [] atr);
				false)[0]; // boolean invert));
		
		final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int iMTile = ai.getAndIncrement(); iMTile < macrotile_centers.length; iMTile = ai.getAndIncrement()) {
						if ((macrotile_centers[iMTile]!=null) && (mb_mode || (weights[2*iMTile] > 0.0))){  // was: weights[iMTile]?
							//infinity_disparity
							boolean is_infinity = macrotile_centers[iMTile][2] < infinity_disparity;
							double [][] deriv_params = ers_ref.getDPxSceneDParameters(
									ers_scene,                   // ErsCorrection    ers_scene,
									true,                        // boolean correctDistortions,
									is_infinity,                 // boolean is_infinity,
									macrotile_centers[iMTile],   // double [] pXpYD_reference,
									reference_xyz,               // double [] reference_xyz, // reference (this) camera center in world coordinates
									scene_xyz,                   // double [] scene_xyz,    // (other, ers_scene) camera center in world coordinates
									reference_matrices_inverse,  // Matrix [] reference_matrices_inverse,
									scene_matrices_inverse,      // Matrix [] scene_matrices_inverse,
									scene_rot_matrix,            // Matrix    scene_rot_matrix,        // single rotation (direct) matrix for the scene
									debug_level);                // int debug_level);
							if (deriv_params!= null) {
								boolean bad_tile = false;
								if (!bad_tile) {
									if (!mb_mode) {
										fx[num_components * iMTile + 0] = deriv_params[0][0]; // pX
										fx[num_components * iMTile + 1] = deriv_params[0][1]; // pY
										if (num_components > 2) {
											fx[num_components * iMTile + 2] = deriv_params[0][2]; // D
										}
									}
									if (jt != null) {
										for (int i = 0; i < par_indices.length; i++) {
											int indx = par_indices[i] + 1;
											if (eig_trans != null) {
												//********************************
												// apply eig_trans here:
												for (int j = 0; j < 2; j++) {
													jt[i][num_components * iMTile + j] =
															eig_trans[iMTile][j][0] * deriv_params[indx][0] +
															eig_trans[iMTile][j][1] * deriv_params[indx][1];	
												}
											} else {
												jt[i][num_components * iMTile + 0] = deriv_params[indx][0]; // pX
												jt[i][num_components * iMTile + 1] = deriv_params[indx][1]; // pY (disparity is not used)
											}
											if (num_components > 2) {
												jt[i][num_components * iMTile + 2] = deriv_params[indx][2]; // pY (disparity is used)
											}
										}
										if (Double.isNaN(jt[0][num_components * iMTile + 0])) {
											System.out.println("getFxDerivs(): NaN, iMTile="+iMTile);
										}
									}
								}
							} else if (mb_mode) {
								if (jt != null) {
									for (int i = 0; i < par_indices.length; i++) {
										jt[i][2 * iMTile + 0] = Double.NaN; // pX
										jt[i][2 * iMTile + 1] = Double.NaN; // ; // pY (disparity is not used)
									}
								}
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		if (mb_mode) {
			return null;
		}
		// pull to the initial parameter values
		for (int i = 0; i < par_indices.length; i++) {
			fx [i + num_samples] = vector[i]; // - parameters_initial[i]; // scale will be combined with weights
			if (jt != null) { 
				jt[i][i + num_samples] = 1.0; // scale will be combined with weights
			}
		}
///		if (parameters_pull != null){
///			for (int i = 0; i < par_indices.length; i++) {
///				fx [i + num_samples] -= parameters_pull[i]; // - parameters_initial[i]; // scale will be combined with weights
///			}			
///		}
		return fx;
	}
	
	private double [][] getWJtJlambda( // USED in lwir
			final double      lambda,
			final double [][] jt)
	{
		final int num_pars = jt.length;
		final int num_pars2 = num_pars * num_pars;
		final int nup_points = jt[0].length;
		final double [][] wjtjl = new double [num_pars][num_pars];
		final Thread[] threads = ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int indx = ai.getAndIncrement(); indx < num_pars2; indx = ai.getAndIncrement()) {
						int i = indx / num_pars;
						int j = indx % num_pars;
						if (j >= i) {
							double d = 0.0;
							for (int k = 0; k < nup_points; k++) {
								if (jt[i][k] != 0) {
									d+=0; // ???
								}
								d += weights[k]*jt[i][k]*jt[j][k];
								if (Double.isNaN(d)) {
									System.out.println("getWJtJlambda(): NAN i="+i+", j="+j+", k="+k);
								}
								
							}
							wjtjl[i][j] = d;
							if (i == j) {
								wjtjl[i][j] += d * lambda;
							} else {
								wjtjl[j][i] = d;
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return wjtjl;
	}
    	
	public void showDebugImage(	String dbg_title ) { // includes "_iteration_number" or "_final"
		if (s_vector == null) {
			return;
		}
		int tilesY =  s_vector.length / tilesX;
		String [] titles = {"dx","dy","dr", "str", "e0", "e1", "e"};
		double [][] dbg_img = new double [titles.length][tilesX*tilesY];
		for (int l = 0; l < dbg_img.length; l++) {
			Arrays.fill(dbg_img[l], Double.NaN);
		}
		double [] fx = getFxDerivs(
				parameters_vector, // double []         vector,
				null,              // final double [][] jt, // should be null or initialized with [vector.length][]
				scenesCLT[1],      // final QuadCLT     scene_QuadClt,
				scenesCLT[0],      // final QuadCLT     reference_QuadClt,
				-1);               // debug_level);      // final int         debug_level)
		double [] ymfxw_m = getYminusFxWeighted(
				fx,     // final double []   fx,
				null,   // final double []   rms_fp // null or [2]
				true);//final boolean     force_metric // when true, ignore transform with eig_trans and use linear dx, dy in pixels
        double [] ymfxw_e = null;
        if (eig_trans != null) {
        	ymfxw_e = getYminusFxWeighted(
    				fx,     // final double []   fx,
    				null,   // final double []   rms_fp // null or [2]
    				false);//final boolean     force_metric // when true, ignore transform with eig_trans and use linear dx, dy in pixels
        }
        for (int nTile = 0; nTile < s_vector.length; nTile++) {
        	int indx = num_components * nTile;
        	double w = weights[num_components * nTile];
        	if ((weights[indx] > 0) && (weights[indx+1] > 0)) {
        		for (int i = 0; i < 2; i++) {
        			ymfxw_m[indx + i] /= weights[indx + i];
        			if (ymfxw_e != null) {
            			ymfxw_e[indx + i] /= weights[indx + i];
        			}
        		}
        		double dx = ymfxw_m[indx + 0]; 
        		double dy = ymfxw_m[indx + 1]; 
        		dbg_img[0][nTile] = dx; 
        		dbg_img[1][nTile] = dy;
        		dbg_img[2][nTile] = Math.sqrt(dx*dx+dy*dy);
        		dbg_img[3][nTile] = s_vector[nTile];
    			if (ymfxw_e != null) {
            		double d0 = ymfxw_e[indx + 0]; 
            		double d1 = ymfxw_e[indx + 1]; 
            		dbg_img[4][nTile] = d0; 
            		dbg_img[5][nTile] = d1;
            		dbg_img[6][nTile] = Math.sqrt(d0*d0+d1*d1);
    			}        	
        	}
        }
		if (ymfxw_e == null) {
			dbg_img[4]=null;
			dbg_img[5]=null;
			dbg_img[6]=null;
		}
		ShowDoubleFloatArrays.showArrays( // out of boundary 15
				dbg_img,
				tilesX,
				tilesY,
				true,
				dbg_title,
				titles);
		
	}
	
	
	public boolean isEigenNormalized() {
		return (eig_trans != null);
	}
	public double [] calcRMS (boolean metric) {
		double [] rms_fp = new double [2];
		double [] fx = getFxDerivs(
				parameters_vector, // double []         vector,
				null,              // final double [][] jt, // should be null or initialized with [vector.length][]
				scenesCLT[1],      // final QuadCLT     scene_QuadClt,
				scenesCLT[0],      // final QuadCLT     reference_QuadClt,
				-1);               // debug_level);      // final int         debug_level)
		last_ymfx = getYminusFxWeighted(
				fx,     // final double []   fx,
				rms_fp, // final double []   rms_fp // null or [2]
				metric);//final boolean     force_metric // when true, ignore transform with eig_trans and use linear dx, dy in pixels
		return rms_fp;
	}
	
	
	private double [] getYminusFxWeighted(
			final double []   fx,
			final double []   rms_fp) { // null or [2]
		return getYminusFxWeighted(
				fx,     // final double []   fx,
				rms_fp, // final double []   rms_fp, // null or [2]
				false); // final boolean     force_metric)			

	}

	private double [] getYminusFxWeighted(
			final double []   fx,
			final double []   rms_fp, // null or [2]
			final boolean     force_metric // when true, ignore transform with eig_trans and use linear dx, dy in pixels			
			) {
		double [] ymfxw;
		if (thread_invariant) {
			 ymfxw =  getYminusFxWeightedInvariant(fx,rms_fp, force_metric); // null or [2]
		} else {
			 ymfxw = getYminusFxWeightedFast      (fx,rms_fp, force_metric); // null or [2]
		}
		return ymfxw; 
	}
	
	
	// TODO: modify these 2 methods to iterate through pairs, and transform pair if (eig_trans != null)
	
	private double [] getYminusFxWeightedInvariant(
			final double []   fx,
			final double []   rms_fp, // null or [2]
			final boolean     force_metric // when true, ignore transform with eig_trans and use linear dx, dy in pixels			
			) {
		final Thread[]      threads =     ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
		final AtomicInteger ai =          new AtomicInteger(0);
		final double []     wymfw =       new double [fx.length];
		double s_rms; 
		final double [] l2_arr = new double [num_samples]; 
		if (!force_metric && (eig_trans != null)) {
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int nTile = ai.getAndIncrement(); nTile < macrotile_centers.length; nTile = ai.getAndIncrement()) {
							int ix = num_components * nTile;
							if (weights[ix] > 0) {
								int iy = ix+1;
								double dx = y_vector[ix] - fx[ix];
								double dy = y_vector[iy] - fx[iy];
								double d0 = eig_trans[nTile][0][0] * dx + eig_trans[nTile][0][1] * dy;
								double d1 = eig_trans[nTile][1][0] * dx + eig_trans[nTile][1][1] * dy;
								double wd0 = d0 * weights[ix];
								double wd1 = d1 * weights[ix];
								if (Double.isNaN(wd0) || Double.isNaN(wd1)) {
									System.out.println("getYminusFxWeighted(): weights["+ix+"]="+weights[ix]+", wd0="+wd0+
											", y_vector[ix]="+y_vector[ix]+", fx[ix]="+fx[ix]);
									System.out.println("getYminusFxWeighted(): weights["+iy+"]="+weights[iy]+", wd1="+wd1+
											", y_vector[iy]="+y_vector[iy]+", fx[iy]="+fx[iy]);
								}
								l2_arr[ix] = d0 * wd0;
								wymfw[ix] = wd0;
								l2_arr[iy] = d1 * wd1;
								wymfw[iy] = wd1;
								for (int j = 2; j < num_components; j++) {
									int i = ix+j;
									double d = y_vector[i] - fx[i];
									double wd = d * weights[i];
									if (Double.isNaN(wd)) {
										System.out.println("getYminusFxWeighted(): weights["+i+"]="+weights[i]+", wd="+wd+
												", y_vector[i]="+y_vector[i]+", fx[i]="+fx[i]);
									}
									//double l2 = d * wd;
									l2_arr[i] = d * wd;
									wymfw[i] = wd;
								}
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		} else {
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						for (int i = ai.getAndIncrement(); i < num_samples; i = ai.getAndIncrement()) if (weights[i] > 0) {
							double d = y_vector[i] - fx[i];
							double wd = d * weights[i];
							if (Double.isNaN(wd)) {
								System.out.println("getYminusFxWeighted(): weights["+i+"]="+weights[i]+", wd="+wd+
										", y_vector[i]="+y_vector[i]+", fx[i]="+fx[i]);
							}
							//double l2 = d * wd;
							l2_arr[i] = d * wd;
							wymfw[i] = wd;
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		} 
		s_rms = 0.0;
		for (double l2:l2_arr) {
			s_rms += l2;
		}
		double rms_pure = Math.sqrt(s_rms/pure_weight);
		for (int i = 0; i < par_indices.length; i++) {
			int indx = i + num_samples;
			double d = y_vector[indx] - fx[indx]; // fx[indx] == vector[i]
			double wd = d * weights[indx];
			s_rms += d * wd;
			wymfw[indx] =   wd;
		}
		double rms = Math.sqrt(s_rms); // assuming sum_weights == 1.0; /pure_weight); shey should be re-normalized after adding regularization
		if (rms_fp != null) {
			rms_fp[0] = rms;
			rms_fp[1] = rms_pure;
		}
		return wymfw;
	}

	
	
	private double [] getYminusFxWeightedFast( // problems. at least with eigen?
			final double []   fx,
			final double []   rms_fp, // null or [2]
			final boolean     force_metric // when true, ignore transform with eig_trans and use linear dx, dy in pixels			
			) {
		final Thread[]      threads =     ImageDtt.newThreadArray(QuadCLT.THREADS_MAX);
		final AtomicInteger ai =          new AtomicInteger(0);
		final double []     wymfw =       new double [fx.length];
		final AtomicInteger ati = new AtomicInteger(0);
		final double [] l2_arr = new double [threads.length];
		if (!force_metric && (eig_trans != null)) {
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						int thread_num = ati.getAndIncrement();
						for (int nTile = ai.getAndIncrement(); nTile < macrotile_centers.length; nTile = ai.getAndIncrement()) {
							int ix = num_components * nTile;
							if (weights[ix] > 0) {
								int iy = ix+1;
								double dx = y_vector[ix] - fx[ix];
								double dy = y_vector[iy] - fx[iy];
								double d0 = eig_trans[nTile][0][0] * dx + eig_trans[nTile][0][1] * dy;
								double d1 = eig_trans[nTile][1][0] * dx + eig_trans[nTile][1][1] * dy;
								double wd0 = d0 * weights[ix];
								double wd1 = d1 * weights[ix];
								if (Double.isNaN(wd0) || Double.isNaN(wd1)) {
									System.out.println("getYminusFxWeighted(): weights["+ix+"]="+weights[ix]+", wd0="+wd0+
											", y_vector[ix]="+y_vector[ix]+", fx[ix]="+fx[ix]);
									System.out.println("getYminusFxWeighted(): weights["+iy+"]="+weights[iy]+", wd1="+wd1+
											", y_vector[iy]="+y_vector[iy]+", fx[iy]="+fx[iy]);
								}
								l2_arr[thread_num] += d0 * wd0;
								wymfw[ix] = wd0;
								l2_arr[thread_num] += d1 * wd1;
								wymfw[iy] = wd1;
								for (int j = 2; j < num_components; j++) {
									int i = ix+j;
									double d = y_vector[i] - fx[i];
									double wd = d * weights[i];
									if (Double.isNaN(wd)) {
										System.out.println("getYminusFxWeighted(): weights["+i+"]="+weights[i]+", wd="+wd+
												", y_vector[i]="+y_vector[i]+", fx[i]="+fx[i]);
									}
									//double l2 = d * wd;
									l2_arr[thread_num] += d * wd;
									wymfw[i] = wd;
								}
							}
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		} else {
			for (int ithread = 0; ithread < threads.length; ithread++) {
				threads[ithread] = new Thread() {
					public void run() {
						int thread_num = ati.getAndIncrement();
						for (int i = ai.getAndIncrement(); i < num_samples; i = ai.getAndIncrement()) if (weights[i] > 0) {
							double d = y_vector[i] - fx[i];
							double wd = d * weights[i];
							if (Double.isNaN(wd)) {
								System.out.println("getYminusFxWeighted(): weights["+i+"]="+weights[i]+", wd="+wd+
										", y_vector[i]="+y_vector[i]+", fx[i]="+fx[i]);
							}
							l2_arr[thread_num] += d * wd;
							wymfw[i] = wd;
						}
					}
				};
			}		      
			ImageDtt.startAndJoin(threads);
		}
		double s_rms = 0.0;
		for (double l2:l2_arr) {
			s_rms += l2;
		}
		double rms_pure = Math.sqrt(s_rms/pure_weight);
		for (int i = 0; i < par_indices.length; i++) {
			int indx = i + num_samples;
			double d = y_vector[indx] - fx[indx]; // fx[indx] == vector[i]
			double wd = d * weights[indx];
			s_rms += d * wd;
			wymfw[indx] =   wd;
		}
		double rms = Math.sqrt(s_rms); // assuming sum_weights == 1.0; /pure_weight); they should be re-normalized after adding regularization
		if (rms_fp != null) {
			rms_fp[0] = rms;
			rms_fp[1] = rms_pure;
		}
		return wymfw;
	}
	
	public static String getChecksum(Serializable object) throws IOException, NoSuchAlgorithmException {
	    ByteArrayOutputStream baos = null;
	    ObjectOutputStream oos = null;
	    try {
	        baos = new ByteArrayOutputStream();
	        oos = new ObjectOutputStream(baos);
	        oos.writeObject(object);
	        MessageDigest md = MessageDigest.getInstance("MD5");
	        byte[] thedigest = md.digest(baos.toByteArray());
	        return DatatypeConverter.printHexBinary(thedigest);
	    } finally {
	        oos.close();
	        baos.close();
	    }
	}
}