package com.elphel.imagej.ims;

import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;

import com.elphel.imagej.cameras.CLTParameters;
import com.elphel.imagej.tileprocessor.ImageDtt;

import Jama.Matrix;

/// Find quaternion to transform array of 3D vectors to the array of the corresponding 3D vectors
public class QuatCorrLMA {
	private static final int   N =             3;
	private static final int   REG_SAMPLES_SQUARED =   1;
	private static final int   REG_SAMPLES_LINEAR =    3;
	
	private int               samples =         0;
	private int               reg_samples;
	private boolean           squared_cost;
	private double []         last_rms =        null; // {rms, rms_pure}, matching this.vector
	private double []         good_or_bad_rms = null; // just for diagnostics, to read last (failed) rms
	private double []         initial_rms =     null; // {rms, rms_pure}, first-calcualted rms
	private double []         parameters_vector = null; // quaternion params: [x, y, z], w derived
//	private double [][]       ims_quats =       null;
	private double []         x_vector =        null;
	private double []         y_vector =        null;
//	private double []         y_inv_vector =    null;
	private double []         weights;          // normalized so sum is 1.0 for all - samples and extra regularization
	private double            pure_weight;      // weight of samples only	
	private double []         last_ymfx =       null;
	private double [][]       last_jt =         null;
	
	public double [] getLastRms() {
		return last_rms;
	}
	
	public double [] getInitialRms() {
		return initial_rms;
	}
	
	public double [] getVector() { // here - quaternion params [x,y,z]
		return parameters_vector;
	}
	
	public double [] getNormQuat() {
		return paramsToQuat(parameters_vector);
	}

	/**
	 * Scale quaternion rotation angle by factor s, preserving axis.
	 * @param q quaternion [w,x,y,z]
	 * @param s scale factor for the rotation angle
	 * @return scaled quaternion [w,x,y,z]
	 */
	public static double[] scaleQuaternion(double[] q, double s) {
		double w = q[0];
		double x = q[1];
		double y = q[2];
		double z = q[3];
		double normV = Math.sqrt(x*x + y*y + z*z);
		double angle = 2.0 * Math.atan2(normV, w);
		double newAngle = angle * s;
		double sinHalf = Math.sin(0.5 * newAngle);
		double cosHalf = Math.cos(0.5 * newAngle);
		if (normV < 1e-15) {
			return new double[] {cosHalf, 0.0, 0.0, 0.0};
		}
		double scale = sinHalf / normV;
		return new double[] {cosHalf, x * scale, y * scale, z * scale};
	}
	
	public double [] getLastFx() {
		double [] fx = getFxDerivs(
				parameters_vector, // double []         vector,
				null,              // final double [][] jt, // should be null or initialized with [vector.length][]
				-3);      // final int         debug_level)
		return fx;
	}
	public double [] getX() {
		return x_vector;
	}
	public double [] getY() {
		return y_vector;
	}
	
	public double [] getW() {
		return weights;
	}
	
	
	

	private static double[] paramsToQuat(double[] p) {
		double x = p[0];
		double y = p[1];
		double z = p[2];
		double w2 = 1.0 - (x * x + y * y + z * z);
		if (w2 < 0.0) {
			w2 = 0.0; // clamp to avoid NaNs; caller should keep small-angle params
		}
		double w = Math.sqrt(w2);
		return new double[] {w, x, y, z};
	}
	
	
	/**
	 * Calculate quaternion to rotate array of 3d vectors imx_xyz[][3] to 
	 * array cam_xyz[][3] with LMA
	 * @param clt_parameters parameters, including for the LMA
	 * @param reg_weight - regularization parameter adding loss for result rotation angle to mitigate almost-colinear
	 *                    input data (range [0,1]). Linear cost if positive, squared cost if negative
	 * @param ims_xyz array of the IMS coordinates [...][3], relative to a reference scene
	 * @param cam_xyz array of the camera coordinates [...][3], relative to a same reference scene
	 * @param weights weights per vector or null (will use the same weights for all vectors
	 * @param rmse null or double [1] to output LMA RMSE. if dfouble[2] will provide initial RMSE (unit quaternion)
	 * @param debugLevel
	 * @return normalized quaternion as double[4] or null if failed
	 */
	public static double [] getQuatLMA(
			CLTParameters   clt_parameters,
			double          reg_weight_in,
			double [][]     ims_xyz,
			double [][]     cam_xyz,
			double []       weights,
			double []       rmse, // double[1] or null
			int             debugLevel) {
		debugLevel+=3;
		double reg_weight = Math.abs(reg_weight_in);
		boolean squared_cost = reg_weight_in < 0;
		QuatCorrLMA quatCorrLMA = new QuatCorrLMA();
		quatCorrLMA.prepareLMA(
				ims_xyz,     // double [][]     ims_xyz,
				cam_xyz,     // double [][]     cam_xyz,
				weights,     // double []       weights),
				squared_cost, // boolean         squared_cost,
				reg_weight); // double          reg_weight);
		int OK = quatCorrLMA.runLma(                      // <0 - failed, >=0 iteration number (1 - immediately)
				clt_parameters.imp.imsq_lambda,           // double lambda,           // 0.1
				clt_parameters.imp.imsq_lambda_scale_good,// double lambda_scale_good,// 0.5
				clt_parameters.imp.imsq_lambda_scale_bad, // double lambda_scale_bad, // 8.0
				clt_parameters.imp.imsq_lambda_max,       // double lambda_max,       // 100
				clt_parameters.imp.imsq_rms_diff,         // double rms_diff,         // 0.001
				clt_parameters.imp.imsq_num_iter,         // int    num_iter,         // 20
				debugLevel);                              // int    debug_level)
		if (OK < 0) {
			return null;
		}
		if (rmse != null) {
			if (rmse.length > 0) {
				rmse[0] = quatCorrLMA.getLastRms()[0];
				if (rmse.length > 1) {
					rmse[1] = quatCorrLMA.getInitialRms()[0];
				}
			}
		}
		if (debugLevel > -3) {
			double [] p = quatCorrLMA.getVector(); // params [x,y,z]
			double [] qn = quatCorrLMA.getNormQuat(); // [w,x,y,z]
			double l2 = Math.sqrt(qn[0]*qn[0]+qn[1]*qn[1]+qn[2]*qn[2]+qn[3]*qn[3]);
			System.out.println("getQuatLMA{}: params= ["+p[0]+", "+p[1]+", "+p[2]+"]"+
					", len="+l2+", RMSE="+quatCorrLMA.getLastRms()[0]+" ("+quatCorrLMA.getInitialRms()[0]+")");
			System.out.println("getQuatLMA{}: quat_corr normalized= ["+qn[0]+", "+qn[1]+", "+qn[2]+", "+qn[3]+"]");
		}
		double [] quat_corr = quatCorrLMA.getNormQuat();
		 // To be able to change value during debugging. Using debug_table = false; causes Eclipse to issue a warning for the non-executable code.
		boolean debug_table = debugLevel >1000;
		if (debug_table) { 
			double [] dbg_w =  quatCorrLMA.getW();
			double [] dbg_x =  quatCorrLMA.getX();
			double [] dbg_y =  quatCorrLMA.getY();
			double [] dbg_fx = quatCorrLMA.getLastFx();
			
			System.out.println(String.format("%3s"+
					"\t%9s"+
					"\t%9s\t%9s\t%9s"+
					"\t%9s\t%9s\t%9s"+
					"\t%9s\t%9s\t%9s"+
					"\t%9s\t%9s\t%9s"+
					"\t%9s\t%9s\t%9s",
					"N(full)","W",
					"x-X","x-Y","x-Z",
					"y-X","y-Y","y-Z",
					"fx-X","fx-Y","fx-Z",
					"ims-X","ims-Y","ims-Z",
					"rot-X","rot-Y","rot-Z"));
			double [] nan3= {Double.NaN,Double.NaN,Double.NaN};
			for (int nscene = 0; nscene < (dbg_w.length/3-1); nscene++) if (dbg_w[3*nscene]>0) {
				double [] ims_xyz_nscene=ims_xyz[nscene];
				double [] ims_rot_xyz = (ims_xyz_nscene==null)? nan3 : Imx5.applyQuaternionTo(quat_corr, ims_xyz_nscene, false);
				System.out.println(String.format("%3d"+
						"\t%9.5f"+
						"\t%9.5f\t%9.5f\t%9.5f"+
						"\t%9.5f\t%9.5f\t%9.5f"+
						"\t%9.5f\t%9.5f\t%9.5f"+
						"\t%9.5f\t%9.5f\t%9.5f"+
						"\t%9.5f\t%9.5f\t%9.5f",
						nscene,
						dbg_w[3*nscene],
						dbg_x[3*nscene+0],dbg_x[3*nscene+1],dbg_x[3*nscene+2],
						dbg_y[3*nscene+0],dbg_y[3*nscene+1],dbg_y[3*nscene+2],
						dbg_fx[3*nscene+0],dbg_fx[3*nscene+1],dbg_fx[3*nscene+2],
						ims_xyz_nscene[0],ims_xyz_nscene[1],ims_xyz_nscene[2],
						ims_rot_xyz[0],ims_rot_xyz[1],ims_rot_xyz[2]));
			}
		}
		return quat_corr; // quatCorrLMA.getNormQuat();
	}
	
	
	public void prepareLMA(
			double [][]     ims_xyz,
			double [][]     cam_xyz,
			double []       sample_weights,
			boolean         squared_cost,
			double          reg_weight) {
		this.squared_cost = squared_cost;
		// all samples should be non-null and do not have NaNs
		reg_samples = this.squared_cost ? REG_SAMPLES_SQUARED : REG_SAMPLES_LINEAR;

		samples = ims_xyz.length;
		if (cam_xyz.length != samples) {
			throw new IllegalArgumentException ("QuatCorrLMA.prepareLMA: ims_xyz and cam_xyz should have the same length");
		}
		if (sample_weights == null) {
			sample_weights = new double[samples];
			Arrays.fill(sample_weights, 1.0);
		}
		boolean [] valid = new boolean [samples];
		if (sample_weights.length != samples) {
			throw new IllegalArgumentException ("QuatCorrLMA.prepareLMA: sample_weights should be null or have the same length as data arrays");
		}
		y_vector = new double[N * samples + reg_samples];
		x_vector = new double[N * samples + reg_samples];
		double sum_w = 0;
		for (int n = 0; n < samples; n++) if ((ims_xyz[n]!=null) &&(cam_xyz[n]!=null)){
			valid[n]=true;
			System.arraycopy(ims_xyz[n], 0, x_vector, N*n, N);
			System.arraycopy(cam_xyz[n], 0, y_vector, N*n, N); // null
			sum_w += sample_weights[n];
		}
		pure_weight = 1.0-reg_weight;
		double ws = pure_weight/N/sum_w;
		weights = new double [N * samples + reg_samples];
		for (int n = 0; n < samples; n++) if (valid[n]){ 
			Arrays.fill(weights, N*n, N*(n+1), ws*sample_weights[n]);
		}
		for (int i = 0; i < reg_samples; i++) {
			weights[N * samples + i] = reg_weight/reg_samples;
		}
		parameters_vector = new double [] {0,0,0}; // small rotation params: [x,y,z], w derived
		last_jt = new double [parameters_vector.length][];
		return; 
	}
	
	
	private double compareJT(
			double [] vector,
			double    delta) {
		int num_dbg_lines = 1000;
		double []  errors=new double [vector.length];
		double [][] jt =  new double [vector.length][];
//		System.out.println("Parameters vector = ["+vector[0]+", "+vector[1]+", "+vector[2]+", "+vector[3]+"]");
		System.out.print("Parameters vector = [");
		for (int i = 0; i < vector.length; i++) {
			System.out.print(vector[i]);
			if (i < (vector.length -1)) System.out.print(", ");
		}
		System.out.println("]");
		getFxDerivs(
				vector,
				jt, // final double [][] jt, // should be null or initialized with [vector.length][]
				1); // debug_level);
		double [][] jt_delta =  getFxDerivsDelta(
				vector, // double []         vector,
				delta, // final double      delta,
				-1); // final int         debug_level)
		System.out.print(String.format("%5s","index"));
		for (int i = 0; i < vector.length; i++) {
			System.out.print(String.format("\t%12s","jt["+i+"]"));
		}			
		for (int i = 0; i < vector.length; i++) {
			System.out.print(String.format("\t%12s","jt_delta["+i+"]"));
		}			
		for (int i = 0; i < vector.length; i++) {
			System.out.print(String.format("\t%12s","jt_diff["+i+"]"));
		}			
		System.out.println();

		for (int n = 0; n < weights.length; n++) if (weights[n] > 0) {
			if (n < num_dbg_lines) {
				System.out.print(String.format("%5d",n));
				for (int i = 0; i < vector.length; i++) {
					System.out.print(String.format("\t%12.9f",jt[i][n]));
				}			
				for (int i = 0; i < vector.length; i++) {
					System.out.print(String.format("\t%12.9f",jt_delta[i][n]));
				}			
				for (int i = 0; i < vector.length; i++) {
					System.out.print(String.format("\t%12.9f",jt[i][n]-jt_delta[i][n]));
				}			
				System.out.println();
			}
			/*
			System.out.println(String.format(
					"%3d\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f",
					n, jt[0][n], jt[1][n], jt[2][n], jt[3][n],
					jt_delta[0][n], jt_delta[1][n], jt_delta[2][n], jt_delta[3][n],
					jt[0][n]-jt_delta[0][n],jt[1][n]-jt_delta[1][n],jt[2][n]-jt_delta[2][n],jt[3][n]-jt_delta[3][n]));
					*/
			for (int i = 0; i < vector.length; i++) {
				errors[i] = Math.max(errors[i], jt[i][n]-jt_delta[i][n]);
			}
		}
		for (int i = 0; i < vector.length; i++) {
			System.out.print("\t\t");
		}			
		for (int i = 0; i < vector.length; i++) {
			System.out.print(String.format("\t%12.9f",errors[i]));
		}			
        /*
		System.out.println(String.format(
				"-\t-\t-\t-\t-\t-\t-\t-\t-\t%12.9f\t%12.9f\t%12.9f\t%12.9f",
				errors[0], errors[1], errors[2], errors[3]));
				*/
		double err=0;
		for (int i = 0; i < vector.length; i++) {
			err = Math.max(errors[i], err);
		}
		return err;
	}
	
	private double [][] getFxDerivsDelta(
			double []         vector,
			final double      delta,
			final int         debug_level) {
		double [][] jt =  new double [vector.length][weights.length];
		for (int nv = 0; nv < vector.length; nv++) {
			double [] vpm = vector.clone();
			vpm[nv]+= 0.5*delta;
			double [] fx_p =  getFxDerivs(
					vpm,
					null, // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);
			vpm[nv]-= delta;
			double [] fx_m =  getFxDerivs(
					vpm,
					null, // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);
			for (int i = 0; i < weights.length; i++) if (weights[i] > 0) {
				jt[nv][i] = (fx_p[i]-fx_m[i])/delta;
			}
		}
		return jt;
	}

	 /**
	   * Transposed Jacobian of V1 = Q * (0,v) * Q* w.r.t quaternion q.
	   * @param q quaternion [w, x, y, z]
	   * @param v vector to rotate
	   * @return jt[4][3] with jt[k][i] = d(V1[i])/d(q[k])
	   */
	  public static double[][] dV1dQ_T(double[] q, double[] v) {
	      double w = q[0], x = q[1], y = q[2], z = q[3];
	      double vx = v[0], vy = v[1], vz = v[2];
	      double dot = x * vx + y * vy + z * vz;

	      double[][] jt = new double[4][3];

	      // dV1/dw
	      jt[0][0] = 2 * w * vx + 2 * (y * vz - z * vy);
	      jt[0][1] = 2 * w * vy + 2 * (z * vx - x * vz);
	      jt[0][2] = 2 * w * vz + 2 * (x * vy - y * vx);

	      // dV1/dx
	      jt[1][0] = 2 * dot;
	      jt[1][1] = -2 * x * vy + 2 * y * vx - 2 * w * vz;
	      jt[1][2] = -2 * x * vz + 2 * z * vx + 2 * w * vy;

	      // dV1/dy
	      jt[2][0] = -2 * y * vx + 2 * x * vy + 2 * w * vz;
	      jt[2][1] = 2 * dot;
	      jt[2][2] = -2 * y * vz + 2 * z * vy - 2 * w * vx;

	      // dV1/dz
	      jt[3][0] = -2 * z * vx - 2 * w * vy + 2 * x * vz;
	      jt[3][1] = 2 * w * vx - 2 * z * vy + 2 * y * vz;
	      jt[3][2] = 2 * dot;

	      return jt;
	  }

	  /**
	   * Jacobian transpose w.r.t. small-angle params p=[x,y,z], with w derived as sqrt(1 - |p|^2).
	   * @param p quaternion params [x,y,z]
	   * @param v vector to rotate
	   * @return jt[3][3] with jt[k][i] = d(V1[i])/d(p[k])
	   */
	  public static double[][] dV1dP_T(double[] p, double[] v) {
	      double[] pInv = {-p[0], -p[1], -p[2]}; // use conjugate to match Apache Rotation.applyTo
	      double[] q = paramsToQuat(pInv);
	      double w = q[0], x = q[1], y = q[2], z = q[3];
	      double wSafe = (w > 1e-12) ? w : 1e-12;
	      double dwdx = -p[0] / wSafe;
	      double dwdy = -p[1] / wSafe;
	      double dwdz = -p[2] / wSafe;
	      double[][] jtQ = dV1dQ_T(q, v);
	      double[][] jt = new double[3][3];
	      for (int i = 0; i < 3; i++) {
	          jt[0][i] = jtQ[0][i] * dwdx - jtQ[1][i];
	          jt[1][i] = jtQ[0][i] * dwdy - jtQ[2][i];
	          jt[2][i] = jtQ[0][i] * dwdz - jtQ[3][i];
	      }
	      return jt;
	  }
	
	
	private double [] getFxDerivs(
			double []         vector, // quaternion params [x,y,z], w derived
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		double [] fx = new double [weights.length];
		double [] q = paramsToQuat(new double[]{-vector[0], -vector[1], -vector[2]}); // conjugate to match Apache applyTo
		double [][] R = quaternionToRotationMatrix(q); // double [] q,
		
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
			}
		}
		for (int n = 0; n < samples; n++) {
			double [] v1 = new double[3];
			double [] v =  new double[3];
			System.arraycopy(x_vector, n * 3, v, 0, 3);
			for (int i = 0; i < 3; i++) {
				for (int k = 0; k < 3; k++) {
					v1[i] += R[i][k]*v[k];
				}
			}
			System.arraycopy(v1, 0, fx, 3 * n, 3);
			if (jt != null) {
				double [][] jtn =  dV1dP_T(
						vector, // double[] params,
						v);     // double[] v
				for (int i = 0; i < vector.length; i++) {
					for (int j = 0; j < v.length; j++) {
						jt[i][v.length * n + j] = jtn[i][j]; 
					}
					System.arraycopy(jtn[i], 0, jt[i], 3 * n, 3);
				}
			}
		}
		// regularization parameter (trying squared)
		int reg_index =  3*samples; 
		if (squared_cost) {
			fx[reg_index] = vector[0]*vector[0]+vector[1]*vector[1]+vector[2]*vector[2];
			if (jt != null) {
				for (int i = 0; i < 3; i++) {
					jt[i][reg_index] = 2 * vector[i];
				}
			}		
		} else { // linear regularization
			for (int i = 0; i < reg_samples; i++) {
				fx[reg_index + i] =  vector[i];
				if (jt != null) {
					jt[i][reg_index+i] = 1;
				}
			}
		}
		return fx;
	}
	
	private double [] getYminusFxWeighted(
			final double []   fx,
			final double []   rms_fp // null or [2]
			) {
		final double []     wymfw =       new double [fx.length];
		double s_rms=0; 
		double rms_pure=Double.NaN;
		for (int i = 0; i < fx.length; i++) {
			double d = y_vector[i] - fx[i];
			double wd = d * weights[i];
			if (Double.isNaN(wd)) {
				System.out.println("getYminusFxWeighted(): weights["+i+"]="+weights[i]+", wd="+wd+
						", y_vector[i]="+y_vector[i]+", fx[i]="+fx[i]);
				wd = 0.0;
				d = 0.0;
			}
			if (i == (samples * N)) {
				rms_pure = Math.sqrt(s_rms/pure_weight);	
			}
			wymfw[i] = wd;
			s_rms += d * wd;
		}
		double rms = Math.sqrt(s_rms); // assuming sum_weights == 1.0;
		if (Double.isNaN(rms_pure)) {
			rms_pure=rms;
		}
		if (rms_fp != null) {
			rms_fp[0] = rms;
			rms_fp[1] = rms_pure;
		}
		return wymfw;
	}
	
	private double [][] getWJtJlambda( // USED in lwir
			final double      lambda,
			final double [][] jt)
	{
		final int num_pars = jt.length;
		final int num_pars2 = num_pars * num_pars;
		final int nup_points = jt[0].length;
		final double [][] wjtjl = new double [num_pars][num_pars];
		final Thread[] threads = ImageDtt.newThreadArray(ImageDtt.THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int indx = ai.getAndIncrement(); indx < num_pars2; indx = ai.getAndIncrement()) {
						int i = indx / num_pars;
						int j = indx % num_pars;
						if (j >= i) {
							double d = 0.0;
							for (int k = 0; k < nup_points; k++) {
								if (jt[i][k] != 0) {
									d+=0;
								}
								d += weights[k]*jt[i][k]*jt[j][k];
							}
							wjtjl[i][j] = d;
							if (i == j) {
								wjtjl[i][j] += d * lambda;
							} else {
								wjtjl[j][i] = d;
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return wjtjl;
	}
	
	public int runLma( // <0 - failed, >=0 iteration number (1 - immediately)
			double lambda,           // 0.1
			double lambda_scale_good,// 0.5
			double lambda_scale_bad, // 8.0
			double lambda_max,       // 100
			double rms_diff,         // 0.001
			int    num_iter,         // 20
			int    debug_level) {
		boolean [] rslt = {false,false};
		this.last_rms = null; // remove?
		int iter = 0;
		for (iter = 0; iter < num_iter; iter++) {
			rslt =  lmaStep(
					lambda,
					rms_diff,
					debug_level);
			if (rslt == null) {
				return -1; // false; // need to check
			}
			if (debug_level > 1) {
				System.out.println("LMA step "+iter+": {"+rslt[0]+","+rslt[1]+"} full RMS= "+good_or_bad_rms[0]+
						" ("+initial_rms[0]+"), pure RMS="+good_or_bad_rms[1]+" ("+initial_rms[1]+") + lambda="+lambda+
						", params=["+parameters_vector[0]+","+parameters_vector[1]+","+parameters_vector[2]+"]");
			}
			if (rslt[1]) {
				break;
			}
			if (rslt[0]) { // good
				lambda *= lambda_scale_good;
			} else {
				lambda *= lambda_scale_bad;
				if (lambda > lambda_max) {
					break; // not used in lwir
				}
			}
		}
		if (rslt[0]) { // better
			if (iter >= num_iter) { // better, but num tries exceeded
				if (debug_level > 1) System.out.println("Step "+iter+": Improved, but number of steps exceeded maximal");
			} else {
				if (debug_level > 1) System.out.println("Step "+iter+": LMA: Success");
			}

		} else { // improved over initial ?
			if (last_rms[0] < initial_rms[0]) { // NaN
				rslt[0] = true;
				if (debug_level > 1) System.out.println("Step "+iter+": Failed to converge, but result improved over initial");
			} else {
				if (debug_level > 1) System.out.println("Step "+iter+": Failed to converge");
			}
		}
		boolean show_intermediate = true;
		if (show_intermediate && (debug_level > 0)) {
			System.out.println("LMA: full RMS="+last_rms[0]+" ("+initial_rms[0]+"), pure RMS="+last_rms[1]+" ("+initial_rms[1]+") + lambda="+lambda);
		}
	
		if (debug_level > 2) {
			double    delta = 1E-5;
			System.out.println("\n\n");
			double err = compareJT(
					parameters_vector, // double [] vector,
					delta);            // double    delta);
			System.out.println("Maximal error = "+err);
		}
		return rslt[0]? iter : -1;
	}
	

	
	
	private boolean [] lmaStep(
			double lambda,
			double rms_diff,
			int debug_level) {
		boolean [] rslt = {false,false};
		// maybe the following if() branch is not needed - already done in prepareLMA !
		if (this.last_rms == null) { //first time, need to calculate all (vector is valid)
			last_rms = new double[2];
			if (debug_level > 1) {
				System.out.println("lmaStep(): first step");
			}
			double [] fx = getFxDerivs(
					parameters_vector, // double []         vector,
					last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);      // final int         debug_level)
			/*
			if (debug_level > 0) {
				 debugYfX ( "fx0-",   // String pfx,
						 fx); // double [] data)
			}
			if (debug_level > 2) {
				debugYfX ( "ffx0-",   // String pfx,
						dbg_data); // double [] data)
			}
			*/
			if (debug_level > 2) {
				double    delta = 1E-5;
				System.out.println("\n\n");
				double err = compareJT(
						parameters_vector, // double [] vector,
						delta);            // double    delta);
				System.out.println("Maximal error = "+err);
			}
			
			last_ymfx = getYminusFxWeighted(
					fx, // final double []   fx,
					last_rms); // final double []   rms_fp // null or [2]
			this.initial_rms = this.last_rms.clone();
			this.good_or_bad_rms = this.last_rms.clone();

			if (debug_level > -1) { // temporary
				/*
				dbgYminusFxWeight(
						this.last_ymfx,
						this.weights,
						"Initial_y-fX_after_moving_objects");
                */
			}
			if (last_ymfx == null) {
				return null; // need to re-init/restart LMA
			}
			// TODO: Restore/implement
			if (debug_level > 3) {
				/*
				 dbgJacobians(
							corr_vector, // GeometryCorrection.CorrVector corr_vector,
							1E-5, // double delta,
							true); //boolean graphic)
				*/
			}
		}
		Matrix y_minus_fx_weighted = new Matrix(this.last_ymfx, this.last_ymfx.length);

		Matrix wjtjlambda = new Matrix(getWJtJlambda(
				lambda, // *10, // temporary
				this.last_jt)); // double [][] jt) // null
		
		if (debug_level>2) {
			System.out.println("JtJ + lambda*diag(JtJ");
			wjtjlambda.print(18, 10);
		}
		Matrix jtjl_inv = null;
		try {
			jtjl_inv = wjtjlambda.inverse(); // check for errors
		} catch (RuntimeException e) {
			rslt[1] = true;
			if (debug_level > 0) {
				System.out.println("Singular Matrix!");
			}

			return rslt;
		}
		if (debug_level>2) {
			System.out.println("(JtJ + lambda*diag(JtJ).inv()");
			jtjl_inv.print(18, 10);
		}
//last_jt has NaNs
		Matrix jty = (new Matrix(this.last_jt)).times(y_minus_fx_weighted);
		if (debug_level>2) {
			System.out.println("Jt * (y-fx)");
			jty.print(18, 10);
		}
		
		
		Matrix mdelta = jtjl_inv.times(jty);
		if (debug_level>2) {
			System.out.println("mdelta");
			mdelta.print(18, 10);
		}

		double scale = 1.0;
		double []  delta =      mdelta.getColumnPackedCopy();
		double []  new_vector = parameters_vector.clone();
		for (int i = 0; i < parameters_vector.length; i++) {
			new_vector[i] += scale * delta[i];
		}
		
		
		double [] fx = getFxDerivs(
				new_vector, // double []         vector,
				last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
				debug_level);      // final int         debug_level)
		double [] rms = new double[2];
		last_ymfx = getYminusFxWeighted(
				fx, // final double []   fx,
				rms); // final double []   rms_fp // null or [2]
		if (debug_level > 2) {
			/*
			dbgYminusFx(this.last_ymfx, "next y-fX");
			dbgXY(new_vector, "XY-correction");
			*/
		}

		if (last_ymfx == null) {
			return null; // need to re-init/restart LMA
		}

		this.good_or_bad_rms = rms.clone();
		if (rms[0] < this.last_rms[0]) { // improved
			rslt[0] = true;
			rslt[1] = rms[0] >=(this.last_rms[0] * (1.0 - rms_diff));
			this.last_rms = rms.clone();

			this.parameters_vector = new_vector.clone();
			if (debug_level > 2) {
				// print vectors in some format
				/*
				System.out.print("delta: "+corr_delta.toString()+"\n");
				System.out.print("New vector: "+new_vector.toString()+"\n");
				System.out.println();
				*/
			}
		} else { // worsened
			rslt[0] = false;
			rslt[1] = false; // do not know, caller will decide
			// restore state
			fx = getFxDerivs(
					parameters_vector, // double []         vector,
					last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);      // final int         debug_level)
			last_ymfx = getYminusFxWeighted(
					fx, // final double []   fx,
					this.last_rms); // final double []   rms_fp // null or [2]
			if (last_ymfx == null) {
				return null; // need to re-init/restart LMA
			}
			if (debug_level > 2) {
				/*
				 dbgJacobians(
							corr_vector, // GeometryCorrection.CorrVector corr_vector,
							1E-5, // double delta,
							true); //boolean graphic)
							*/
			}
		}
		return rslt;
	}
	
	/**
	 * Quaternion to rotation matrix conversion
	 *     |  w^2+qx^2-qy^2-qz^2, 2*(qx*qy-qz*w),     2*(qx*qz+qy*w)     |
     * R = |  2*(qx*qy+qz*w),     w^2-qx^2+qy^2-qz^2, 2*(qy*qz-qx*w)     |
     *     |  2*(qx*qz-qy*w),     2*(qy*qz+qx*w),     w^2-qx^2-qy^2+qz^2 |
	 * @param q   4 components (w, qx, qy, qz) of the quaternion
	 * @return Jacobian - derivatives of the rotated vector by each of the xyz vector components. rows - components of the rotated vector,
	 * columns - input vector components.
	 */
	public static double [][] quaternionToRotationMatrix(
			double [] q) {
		double q00 = q[0]*q[0];
		double q11 = q[1]*q[1];
		double q22 = q[2]*q[2];
		double q33 = q[3]*q[3];
		
		double q01 = q[0]*q[1];
		double q02 = q[0]*q[2];
		double q03 = q[0]*q[3];
		double q12 = q[1]*q[2];
		double q13 = q[1]*q[3];
		double q23 = q[2]*q[3];
		
		return new double [][]{
			{	q00+q11-q22-q33, 2*(q12-q03),     2*(q13+q02)},
			{	2*(q12+q03),     q00-q11+q22-q33, 2*(q23-q01)},
			{	2*(q13-q02),     2*(q23+q01),     q00-q11-q22+q33}};
	}
}
