/**
 **
 ** QuaternionLma - Find quaternion to best transform a set of input 3D vectors
 **                into a set of output 3D vectors
 **
 ** Copyright (C) 2023 Elphel, Inc.
 **
 ** -----------------------------------------------------------------------------**
 **
 **  QuaternionLma.java is free software: you can redistribute it and/or modify
 **  it under the terms of the GNU General Public License as published by
 **  the Free Software Foundation, either version 3 of the License, or
 **  (at your option) any later version.
 **
 **  This program is distributed in the hope that it will be useful,
 **  but WITHOUT ANY WARRANTY; without even the implied warranty of
 **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 **  GNU General Public License for more details.
 **
 **  You should have received a copy of the GNU General Public License
 **  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ** -----------------------------------------------------------------------------**
 **
 */

package com.elphel.imagej.tileprocessor;

import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.math3.geometry.euclidean.threed.Rotation;
import org.apache.commons.math3.geometry.euclidean.threed.RotationConvention;
import org.apache.commons.math3.geometry.euclidean.threed.RotationOrder;

import Jama.Matrix;

public class QuaternionLma {
    private final static int  REGLEN =            1; // number of extra (regularization) samples
    public static final int MODE_XYZ =           0;
    public static final int MODE_XYZQ =          1; // OK with [3]
    public static final int MODE_COMBO =         2;
    public static final int MODE_XYZQ_LOCAL =    3;
    public static final int MODE_COMBO_LOCAL =   4; // OK with [3]
    public static final int MODE_COMPASS =       5;
    public static final int MODE_XYZ4Q3 =        6; // Q0-Q3 for tranlation (with scale), Q1-Q3 - for rotation
    
//    public static final int MODE_XYZ3 =          6;
//    public static final int MODE_XYZQ3 =         7;
//    public static final int MODE_COMBO3 =        8;
//    public static final int MODE_XYZQ_LOCAL3 =   9;
//    public static final int MODE_COMBO_LOCAL3 = 10;
    
	private int               N =               0;
	// Mode2 - compensating camera uncertainty:dpx/dx ~= -dpx/daz/height(m) dpy/dy ~= dpy/dtl/height(m)
	private int               mode =            0; // 0 xyz, 1 - xyz,quat, 2: Z/x, 2Q3, 2Q2-X/h, 2Q1+Y/h
//	private boolean           use_6dof =        false;
	private int               samples =         3;
	private int               samples_x =       3;
	private double            height = 1;
	private double []         last_rms =        null; // {rms, rms_pure}, matching this.vector
	private double []         good_or_bad_rms = null; // just for diagnostics, to read last (failed) rms
	private double []         initial_rms =     null; // {rms, rms_pure}, first-calcualted rms
	private double []         parameters_vector = null;
	private double []         x_vector =        null;
	private double []         y_vector =        null;
	private double []         y_inv_vector =    null;
	private double []         weights;     // normalized so sum is 1.0 for all - samples and extra regularization
	private double            pure_weight; // weight of samples only
	private double            xyz_weight;  // weight of all xyz, samples (weight of rotations - pure_weight- xyz_weight     
	private double []         last_ymfx =       null;
	private double [][]       last_jt =         null;
	private double []         axis = null;
	private double[] dbg_data;
	
	public double [] getQuaternion() {
		if (parameters_vector.length == 3) {
			return new double [] {
					getQ0(parameters_vector),
					parameters_vector[0],
					parameters_vector[1],
					parameters_vector[2]};
		}
		return parameters_vector;
	}
	
	public double [] getLastRms() {
		return getRms4(last_rms);
	}
	
	
	public double [] getInitialRms() {
		return getRms4(initial_rms);
	}

	public double [] getGoodOrBadRms() {
		return getRms4(good_or_bad_rms);
	}

	
	private double [] getRms4(double [] rms3) { // rms, rms_pure, xyz_rms
		double [] rms4 = new double [4];
		Arrays.fill(rms4, Double.NaN);
		System.arraycopy(rms3, 0, rms4, 0, Math.min(rms3.length, 3));
		if (rms3.length > 2 ) {
			rms4[3]=(rms3[1]*pure_weight - rms3[2]*xyz_weight)/(pure_weight - xyz_weight);
		}
		return rms4;
	}
	
	
	public double [] getAxis() {
		return axis;
	}
	public double [] getAxisQuat() {
		double c = Math.cos(parameters_vector[0]/2), s = Math.sin(parameters_vector[0]/2);
		return new double [] { c, s*axis[0], s*axis[1], s*axis[2]};
	}

	public double [] getLastFx() {
		double [] fx = getFxDerivs(
				parameters_vector, // double []         vector,
				null,              // final double [][] jt, // should be null or initialized with [vector.length][]
				-3);      // final int         debug_level)
		return fx;
	}
	public double [] getX() {
		return x_vector;
	}
	public double [] getY() {
		return y_vector;
	}
	public double [] getW() {
		return weights;
	}

	
	public void prepareCompassLMA(
		double [][] vect_x,  // GPS-derived X,Y,Z relative to the reference frame
		double [][] vect_y,  // Camera X,Y,Z relative to the reference frame
		double [][] vect_w,
		double []   vector_up, // Up in the IMS axes nearest to the camera Z (rotated by - ims_mount_atr)
		final int   debug_level) {
		double axis_l = Math.sqrt(
				vector_up[0]*vector_up[0]+
				vector_up[1]*vector_up[1]+
				vector_up[2]*vector_up[2]);
		axis = new double [] {vector_up[0]/axis_l, vector_up[1]/axis_l, vector_up[2]/axis_l};
		N = vect_x.length;
		pure_weight = 1.0;
		mode = MODE_COMPASS;
		samples = 3;
		samples_x = 3;
		x_vector = new double [3* N];		
		y_vector = new double [3* N];
		weights =  new double [3* N];
		parameters_vector = new double[1];
		double sw = 0;
		for (int i = 0; i < N; i++) {
			if ((vect_x[i]== null) || (vect_y[i]== null)) {
				for (int j = 0; j < samples; j++) {
					x_vector[samples * i + j] = 0.0;
					y_vector[samples * i + j] = 0.0;
					weights[samples*i + j] = 0.0;
				}				
			} else {
				for (int j = 0; j < 3; j++) {
					x_vector[samples * i + j] = vect_x[i][j];
					y_vector[samples * i + j] = vect_y[i][j];
					double w = (vect_w != null)? vect_w[i][j] : 1.0;
					weights[samples*i + j] = w;
					sw += w;
				}
			}
		}
		double k = (pure_weight)/sw;
		for (int i = 0; i < weights.length; i++) weights[i] *= k;
		last_jt = new double [parameters_vector.length][];
		if (debug_level > 0) {
			 debugYfX ( "",   // String pfx,
						y_vector); // double [] data)
			 debugYfX ( "GNSS-",   // String pfx,
						x_vector); // double [] data)
		}
		xyz_weight = 0;
	}
	
	public void prepareLMA(
			double [][][] vect_x,
			double [][][] vect_y,
			double [][]   vect_w,
			double        reg_w, // regularization weight [0..1) weight of q0^2+q1^2+q3^2 -1  
			double []     quat0,
			final int     debug_level) {
		N = vect_x.length;
		pure_weight = 1.0 - reg_w;
		mode = MODE_XYZ;
		samples = 3;
		samples_x = 3;
		x_vector = new double [3* N];
		y_vector = new double [3* N + REGLEN];
		weights =  new double [3* N + REGLEN];
		parameters_vector = quat0.clone();
		double sw = 0;
		for (int i = 0; i < N; i++) {
			if ((vect_x[i]== null) || (vect_y[i]== null)) {
				for (int j = 0; j < samples; j++) {
					x_vector[samples * i + j] = 0.0;
					y_vector[samples * i + j] = 0.0;
					weights[samples*i + j] = 0.0;
				}				
			} else {
				for (int j = 0; j < 3; j++) {
					x_vector[samples * i + j] = vect_x[i][0][j];
					y_vector[samples * i + j] = vect_y[i][0][j];
					double w = (vect_w != null)? vect_w[i][j] : 1.0;
					weights[samples*i + j] = w;
					sw += w;
				}
			}
		}
		double k = (pure_weight)/sw;
		for (int i = 0; i < weights.length; i++) weights[i] *= k;
		weights[samples * N] = 1.0 - pure_weight;
		y_vector[samples * N] = 1.0;
		last_jt = new double [parameters_vector.length][];
		xyz_weight = pure_weight;
	}
	
	
	
	public void prepareLMA(
			int           mode,
			double [][][] vect_x, // []{{x,y,z},{a,t,r}}
			double [][][] vect_y, // []{{x,y,z},{a,t,r}}
			double []     vect_w, // one per scene
			double        translation_weight, // 0.0 ... 1.0;
			double        reg_w,      // regularization weight [0..1) weight of q0^2+q1^2+q3^2 -1  
			double []     quat0,
			final int     debug_level) {
		if (mode == MODE_XYZ4Q3) {
			prepareLMA43(
					mode, // int           mode,
					vect_x, // double [][][] vect_x, // []{{x,y,z},{a,t,r}}
					vect_y, // double [][][] vect_y, // []{{x,y,z},{a,t,r}}
					vect_w, // double []     vect_w, // one per scene
					translation_weight, // double        translation_weight, // 0.0 ... 1.0;
					reg_w,              // double        reg_w,      // regularization weight [0..1) weight of q0^2+q1^2+q3^2 -1  
					quat0,              // double []     quat0,
					debug_level);       // final int     debug_level);
			return;
		}
		if (mode != MODE_XYZQ) { // == MODE_XYZQ_LOCAL
			prepareLMAMode3(
					mode, // int           mode,
					vect_x, // double [][][] vect_x, // []{{x,y,z},{a,t,r}}
					vect_y, // double [][][] vect_y, // []{{x,y,z},{a,t,r}}
					vect_w, // double []     vect_w, // one per scene
					translation_weight, // double        translation_weight, // 0.0 ... 1.0;
					reg_w,              // double        reg_w,      // regularization weight [0..1) weight of q0^2+q1^2+q3^2 -1  
					quat0,              // double []     quat0,
					debug_level);       // final int     debug_level);
			return;
		}
		//MODE_XYZQ
		N = vect_x.length;
		this.mode = mode;
		samples = 3 + quat0.length;
		samples_x = 7;
		pure_weight = 1.0 - reg_w;
		int extra_samples = (reg_w > 0) ? quat0.length:0; // (quat0.length < 4)? 0:REGLEN;
		x_vector = new double [samples_x * N];
		y_vector = new double [samples *   N + extra_samples];
		weights =  new double [samples *   N + extra_samples];
		parameters_vector = quat0.clone();
		double sw = 0;
		xyz_weight = 0;
		for (int i = 0; i < N; i++) {
			double sample_weight = ((vect_x[i]== null) || (vect_y[i]== null)) ? 0.0:((vect_w != null)?  vect_w[i] : 1.0);
			double tw = sample_weight * translation_weight;
			double rw = sample_weight * (1.0 - translation_weight);
			if ((vect_x[i]== null) || (vect_y[i]== null)) {
				for (int j = 0; j < samples; j++) {
					y_vector[samples * i + j] = 0.0;
					weights [samples * i + j] = 0.0;
				}				
				for (int j = 0; j < samples_x; j++) {
					x_vector[samples_x * i + j] = 0.0;
					y_vector[samples *   i + j] = 0.0;
					weights [samples *   i + j] = 0.0;
				}				
			} else {
				// xyz
//				double w = translation_weight*((vect_w != null)?  vect_w[i] : 1.0);
				Rotation   rot_x = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_x[i][1][0], vect_x[i][1][1], vect_x[i][1][2]);
				Rotation   rot_y = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_y[i][1][0], vect_y[i][1][1], vect_y[i][1][2]);
				
				// Translation componets
				for (int j = 0; j < 3; j++) {
					x_vector[samples_x * i + j] = vect_x[i][0][j];
					y_vector[samples * i +   j] = vect_y[i][0][j];
					weights[samples * i +    j] = tw;
					sw += tw;
					xyz_weight += tw;
				}
				x_vector[samples_x * i + 3] = rot_x.getQ0();
				x_vector[samples_x * i + 4] = rot_x.getQ1();
				x_vector[samples_x * i + 5] = rot_x.getQ2();
				x_vector[samples_x * i + 6] = rot_x.getQ3();
				
// Rotation componets
//				w = (1.0 - translation_weight)*((vect_w != null)?  vect_w[i] : 1.0);
				if (samples < samples_x) {
					y_vector[samples * i + 3] = rot_y.getQ1();
					y_vector[samples * i + 4] = rot_y.getQ2();
					y_vector[samples * i + 5] = rot_y.getQ3();
					for (int j = 0; j < 3; j++) {
						weights[samples * i + 3 + j] = rw;
						sw += rw;
					}
				} else {
					y_vector[samples * i + 3] = rot_y.getQ0();
					y_vector[samples * i + 4] = rot_y.getQ1();
					y_vector[samples * i + 5] = rot_y.getQ2();
					y_vector[samples * i + 6] = rot_y.getQ3();
					for (int j = 1; j < 4; j++) {
						weights[samples * i + 3 + j] = rw;
						sw += rw;
					}
				}
			}
		}
		double k = (pure_weight)/sw;
		for (int i = 0; i < weights.length; i++) weights[i] *= k;
		xyz_weight *= k;
		if (extra_samples > 0) {
			double w = (1.0 - pure_weight)/parameters_vector.length;
			for (int i = 0; i < parameters_vector.length; i++) {
				weights [samples * N + i] = w;
				y_vector[samples * N] = 0.0; // or target value
			}
		}
		last_jt = new double [parameters_vector.length][];		
		if (debug_level > 0) {
			 debugYfX ( "",   // String pfx,
						y_vector); // double [] data)
			 debugYfX ( "PIMU-",   // String pfx,
						x_vector); // double [] data)
		}
		return;
	}
	public void prepareLMA43(
			int           mode,
			double [][][] vect_x, // []{{x,y,z},{a,t,r}}
			double [][][] vect_y, // []{{x,y,z},{a,t,r}}
			double []     vect_w, // one per scene
			double        translation_weight, // 0.0 ... 1.0;
			double        reg_w,      // regularization weight [0..1) weight of q0^2+q1^2+q3^2 -1  
			double []     quat0,
			final int     debug_level) {
		N = vect_x.length;
		this.mode = mode;
		samples = 6;
		samples_x = 7;
		pure_weight = 1.0 - reg_w;
		int extra_samples = 0; // (quat0.length < 4)? 0:REGLEN;
		x_vector = new double [samples_x * N];
		y_vector = new double [samples *   N + extra_samples];
		weights =  new double [samples *   N + extra_samples];
		parameters_vector = quat0.clone();
		double sw = 0;
		xyz_weight = 0;
		for (int i = 0; i < N; i++) {
			if ((vect_x[i]== null) || (vect_y[i]== null)) {
				for (int j = 0; j < samples; j++) {
					y_vector[samples * i + j] = 0.0;
					weights [samples * i + j] = 0.0;
				}				
				for (int j = 0; j < samples_x; j++) {
					x_vector[samples_x * i + j] = 0.0;
					y_vector[samples *   i + j] = 0.0;
					weights [samples *   i + j] = 0.0;
				}				
			} else {
				// xyz
				double sample_weight = (vect_w != null)?  vect_w[i] : 1.0;
				double tw = sample_weight * translation_weight;
				double rw = sample_weight * (1.0 - translation_weight);
//				double w = translation_weight*((vect_w != null)?  vect_w[i] : 1.0);
				// quaternions (both normalized)
				Rotation   rot_x = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_x[i][1][0], vect_x[i][1][1], vect_x[i][1][2]);
				Rotation   rot_y = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_y[i][1][0], vect_y[i][1][1], vect_y[i][1][2]);
				for (int j = 0; j < 3; j++) {
					x_vector[samples_x * i + j] = vect_x[i][0][j];
					y_vector[samples * i +   j] = vect_y[i][0][j];
					weights[samples * i +    j] = tw;
					sw += tw;
					xyz_weight += tw;
				}
				x_vector[samples_x * i + 3] = rot_x.getQ0();
				x_vector[samples_x * i + 4] = rot_x.getQ1();
				x_vector[samples_x * i + 5] = rot_x.getQ2();
				x_vector[samples_x * i + 6] = rot_x.getQ3();
				y_vector[samples * i + 3] = rot_y.getQ1();
				y_vector[samples * i + 4] = rot_y.getQ2();
				y_vector[samples * i + 5] = rot_y.getQ3();
//				w = (1.0 - translation_weight)*((vect_w != null)?  vect_w[i] : 1.0);
				for (int j = 0; j < 3; j++) {
					weights[samples * i + 3 + j] = rw;
					sw += rw;
				}
			}
		}
		double k = (pure_weight)/sw;
		for (int i = 0; i < weights.length; i++) weights[i] *= k;
		xyz_weight *= k;
		if (extra_samples>0) {
			weights [samples * N] = 1.0 - pure_weight;
			y_vector[samples * N] = 1.0;
		}
		last_jt = new double [parameters_vector.length][];		
		if (debug_level > 0) {
			 debugYfX ( "",   // String pfx,
						y_vector); // double [] data)
			 debugYfX ( "PIMU-",   // String pfx,
						x_vector); // double [] data)
		}
	}

	
	public void prepareLMAMode3(
			int           mode,
			double [][][] vect_x, // []{{x,y,z},{a,t,r}}
			double [][][] vect_y, // []{{x,y,z},{a,t,r}}
			double []     vect_w, // one per scene
			double        translation_weight, // 0.0 ... 1.0;
			double        reg_w,      // regularization weight [0..1) weight of q0^2+q1^2+q3^2 -1  
			double []     quat0,
			final int     debug_level) {
		N = vect_x.length;
		this.mode = mode;
		samples = 7;
		samples_x = 7;
		pure_weight = 1.0 - reg_w;
		int extra_samples = 0; // (reg_w > 0)? quat0.length:0; // (quat0.length < 4)? 0:REGLEN;
		x_vector =     new double [samples *   N];
		y_vector =     new double [samples *   N + extra_samples];
		y_inv_vector = new double [samples_x * N + extra_samples]; 		
		weights =      new double [samples *   N + extra_samples];
		parameters_vector = quat0.clone();
		xyz_weight =   0;
//		double [] tr_w = new double [] {translation_weight, 1.0-translation_weight};
		double sw = 0;
		for (int i = 0; i < N; i++) {
			double sample_weight = ((vect_x[i]== null) || (vect_y[i]== null)) ? 0.0:((vect_w != null)?  vect_w[i] : 1.0);
			double tw = sample_weight * translation_weight;
			double rw = sample_weight * (1.0 - translation_weight);
			if ((vect_x[i]== null) || (vect_y[i]== null)) {
				for (int j = 0; j < samples; j++) {
					x_vector    [samples * i + j] = 0.0;
					y_inv_vector[samples * i + j] = 0.0;
					weights     [samples * i + j] = 0.0;
				}				
			} else {
				// xyz
				for (int j = 0; j < 3; j++) {
					x_vector[samples_x * i + j] = vect_x[i][0][j];
				}
				// quaternions
				Rotation   rot_x = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_x[i][1][0], vect_x[i][1][1], vect_x[i][1][2]);
				x_vector[samples_x * i + 3] = rot_x.getQ0();
				x_vector[samples_x * i + 4] = rot_x.getQ1();
				x_vector[samples_x * i + 5] = rot_x.getQ2();
				x_vector[samples_x * i + 6] = rot_x.getQ3();
				
				Rotation   rot_y = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_y[i][1][0], vect_y[i][1][1], vect_y[i][1][2]);
				// just testing - y should be all 0
				double [] xyz_y = new double [] {
						vect_y[i][0][0],
						vect_y[i][0][1],
						vect_y[i][0][2]}; 
				double [] quat_y = new double [] {
						rot_y.getQ0(),
						rot_y.getQ1(),
						rot_y.getQ2(),
						rot_y.getQ3()};
				double [][] inv_y = invertTransRot(
						xyz_y,   // double [] xyz_src,     // transformation to apply to (was reference_xyz)
						quat_y); // double [] quat_src);
				double [][] comb_y = combineTransRot( // just to verify it is 0; 
						inv_y[0], // double [] xyz_src,     // transformation to apply to (was reference_xyz)
						inv_y[1], // double [] quat_src,    // transformation to apply to (was reference_atr)
						xyz_y,    // double [] xyz_target,  // to which is applied (was scene_xyz)
						quat_y);  // double [] quat_target  // to which is applied (was scene_atr)		
				System.arraycopy(inv_y[0], 0, y_inv_vector, samples_x * i,      3);
				System.arraycopy(inv_y[1], 0, y_inv_vector, samples_x * i + 3,  4);
				y_vector[samples_x * i + 3] = 1.0; // no rotation
				// removed 02/13/2026 - what was 0.75?
//				double wt = translation_weight*((vect_w != null)?  vect_w[i] : 1.0);
//				double wr = 0.75*(1.0 - translation_weight)*((vect_w != null)?  vect_w[i] : 1.0);
				for (int j = 0; j < 3; j++) {
					weights[samples * i +  j] = tw; //wt;
					sw += tw; // wt;
					xyz_weight += tw; // wt;
				}
//				for (int j = 1; j < 4; j++) {// 0 - q0, near 1.0
				for (int j = 0; j < 4; j++) {// 0 - q0, near 1.0
					weights[samples * i + 3 + j] = rw; // wr;
					sw += rw; //wr;
				}
			}
		}
		double k = (pure_weight)/sw;
		for (int i = 0; i < weights.length; i++) weights[i] *= k;
		xyz_weight *= k;
		if (extra_samples > 0) {
			weights [samples * N] = 1.0 - pure_weight;
			y_inv_vector[samples * N] = 1.0;
		}
		last_jt = new double [parameters_vector.length][];		
		if (debug_level > 0) {
			 debugYfX ( "Y-INV-",   // String pfx,
					 y_inv_vector); // double [] data)
			 debugYfX ( "PIMU-",   // String pfx,
						x_vector); // double [] data)
		}
	}
	
	
	public void prepareLMA(
			int           mode,
			double        avg_height,         // 
			double [][][] vect_x, // []{{x,y,z},{a,t,r}}
			double [][][] vect_y, // []{{x,y,z},{a,t,r}}
			double []     vect_w, // one per scene
			double        reg_w,      // regularization weight [0..1)  
			double []     quat0,
			final int     debug_level) {
		if (mode != MODE_COMBO) {
			prepareLMAMode4(
					mode,         // int           mode,
					avg_height,   // double        avg_height,
					vect_x,       // double [][][] vect_x, // []{{x,y,z},{a,t,r}}
					vect_y,       // double [][][] vect_y, // []{{x,y,z},{a,t,r}}
					vect_w,       // double []     vect_w, // one per scene
					reg_w,        // double        reg_w,      // regularization weight [0..1) weight of q0^2+q1^2+q3^2 -1  
					quat0,        // double []     quat0,
					debug_level); // final int     debug_level);
			return;
		}
		
		N = vect_x.length;
		this.mode = mode; // 2;
		samples = 4;
		samples_x = 7;
		height = avg_height;
		pure_weight = 1.0 - reg_w;
		int extra_samples = (reg_w > 0) ? quat0.length:0; // (quat0.length < 4)? 0:REGLEN;
		x_vector = new double [samples_x * N];
		y_vector = new double [samples *   N + extra_samples];
		weights =  new double [samples *   N + extra_samples];
		parameters_vector = quat0.clone();
		double sw = 0;
		for (int i = 0; i < N; i++) {
			if ((vect_x[i]== null) || (vect_y[i]== null)) {
				for (int j = 0; j < samples; j++) {
					y_vector[samples * i + j] = 0.0;
					weights [samples * i + j] = 0.0;
				}
				for (int j = 0; j < samples_x; j++) {
					x_vector[samples_x * i + j] = 0.0;
				}				
				
			} else {
				// mode=2
				double w = ((vect_w != null)?  vect_w[i] : 1.0);
				// quaternions
				Rotation   rot_x = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_x[i][1][0], vect_x[i][1][1], vect_x[i][1][2]);
				Rotation   rot_y = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_y[i][1][0], vect_y[i][1][1], vect_y[i][1][2]);
				x_vector[samples_x * i + 0] = vect_x[i][0][0]; // X
				x_vector[samples_x * i + 1] = vect_x[i][0][1]; // Y
				x_vector[samples_x * i + 2] = vect_x[i][0][2]; // Z
				x_vector[samples_x * i + 3] = rot_x.getQ0();   // Q0
				x_vector[samples_x * i + 4] = rot_x.getQ1();   // Q1
				x_vector[samples_x * i + 5] = rot_x.getQ2();   // Q2
				x_vector[samples_x * i + 6] = rot_x.getQ3();   // Q3

				y_vector[samples * i + 0] = vect_y[i][0][2]/height; // Z
				y_vector[samples * i + 1] = 2*rot_y.getQ3();        // 2 * Q3
				y_vector[samples * i + 2] = 2*rot_y.getQ2() - vect_y[i][0][0] / height; // 2 * Q2 - X / height
				y_vector[samples * i + 3] = 2*rot_y.getQ1() + vect_y[i][0][1] / height; // 2 * Q1 + Y / height
				
				for (int j = 0; j < samples; j++) {
					weights[samples * i + j] = w;
					sw += w;
				}
			}
		}
		double k = (pure_weight)/sw;
		for (int i = 0; i < weights.length; i++) weights[i] *= k;
		if (extra_samples > 0) {
			double w = (1.0 - pure_weight)/parameters_vector.length;
			for (int i = 0; i < parameters_vector.length; i++) {
				weights [samples * N + i] = w;
				y_vector[samples * N] = 0.0; // or target value
			}
		}
//		weights [samples * N] = 1.0 - pure_weight;
//		y_vector[samples * N] = 1.0;
		last_jt = new double [parameters_vector.length][];		
		if (debug_level > 0) {
			 debugYfX ( "",   // String pfx,
						y_vector); // double [] data)
			 debugYfX ( "PIMU-",   // String pfx,
						x_vector); // double [] data)
		}
	}

	public void prepareLMAMode4( // MODE_COMBO_LOCAL
			int           mode,
			double        avg_height,         // 
			double [][][] vect_x, // []{{x,y,z},{a,t,r}}
			double [][][] vect_y, // []{{x,y,z},{a,t,r}}
			double []     vect_w, // one per scene
			double        reg_w,      // regularization weight [0..1) weight of q0^2+q1^2+q3^2 -1  
			double []     quat0,
			final int     debug_level) {
		
		N = vect_x.length;
		this.mode = mode; // 2;
		samples = 4;
		samples_x = 7;
		height = avg_height;
		pure_weight = 1.0 - reg_w;
		int extra_samples = (reg_w > 0) ? quat0.length:0; // (quat0.length < 4)? 0:REGLEN;
		x_vector =     new double [samples_x * N];
		y_vector =     new double [samples *   N + extra_samples];
		y_inv_vector = new double [samples_x * N + extra_samples]; 		
		weights =      new double [samples *   N + extra_samples];
		parameters_vector = quat0.clone();
		double sw = 0;
		for (int i = 0; i < N; i++) {
			if ((vect_x[i]== null) || (vect_y[i]== null)) {
				for (int j = 0; j < samples; j++) {
					weights [samples * i + j] = 0.0;
				}
				for (int j = 0; j < samples_x; j++) {
					x_vector    [samples_x * i + j] = 0.0;
					y_inv_vector[samples_x * i + j] = 0.0;
				}				
				
			} else {
				// mode=4
				double w = ((vect_w != null)?  vect_w[i] : 1.0);
				// quaternions
				Rotation   rot_x = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_x[i][1][0], vect_x[i][1][1], vect_x[i][1][2]);
				Rotation   rot_y = new Rotation(RotationOrder.YXZ, ErsCorrection.ROT_CONV,
						vect_y[i][1][0], vect_y[i][1][1], vect_y[i][1][2]);
				x_vector[samples_x * i + 0] = vect_x[i][0][0]; // X
				x_vector[samples_x * i + 1] = vect_x[i][0][1]; // Y
				x_vector[samples_x * i + 2] = vect_x[i][0][2]; // Z
				x_vector[samples_x * i + 3] = rot_x.getQ0();   // Q0
				x_vector[samples_x * i + 4] = rot_x.getQ1();   // Q1
				x_vector[samples_x * i + 5] = rot_x.getQ2();   // Q2
				x_vector[samples_x * i + 6] = rot_x.getQ3();   // Q3
				double [] xyz_y = new double [] {
						vect_y[i][0][0],
						vect_y[i][0][1],
						vect_y[i][0][2]}; 
				double [] quat_y = new double [] {
						rot_y.getQ0(),
						rot_y.getQ1(),
						rot_y.getQ2(),
						rot_y.getQ3()};
				double [][] inv_y = invertTransRot(
						xyz_y,   // double [] xyz_src,     // transformation to apply to (was reference_xyz)
						quat_y); // double [] quat_src);
				System.arraycopy(inv_y[0], 0, y_inv_vector, samples_x * i,      3);
				System.arraycopy(inv_y[1], 0, y_inv_vector, samples_x * i + 3,  4);
				// y_vector remains all 0
				
				for (int j = 0; j < samples; j++) {
					weights[samples * i + j] = w;
					sw += w;
				}
			}
		}
		double k = (pure_weight)/sw;
		for (int i = 0; i < weights.length; i++) weights[i] *= k;
		if (extra_samples > 0) {
			double w = (1.0 - pure_weight)/parameters_vector.length;
			for (int i = 0; i < parameters_vector.length; i++) {
				weights [samples * N + i] = w;
				y_vector[samples * N] = 0.0; // or target value
			}
		}
//		weights [samples * N] = 1.0 - pure_weight;
//		y_vector[samples * N] = 1.0;
		last_jt = new double [parameters_vector.length][];		
		if (debug_level > 0) {
			 debugYfX ( "Y-INV-",   // String pfx,
					 y_inv_vector); // double [] data)
			 debugYfX ( "PIMU-",   // String pfx,
						x_vector); // double [] data)
		}
		xyz_weight = 0;
		return;
	}
	
	// TODO: Consider adding differences between x and y for regularization (or it won't work)
	// goal - to minimize "unneeded" rotation along the common axis
	// updated for MODE_XYZQ/3, MODE_COMBO_LOCAL/3 ( /3 - only 3 quaternion components
	private double [] getFxDerivs(
			double []         vector,
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		switch (mode) {
		case MODE_XYZ4Q3:return  getFxDerivs6Dof43(
				 vector,       // double []         vector,
				 jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
				 debug_level); // final int         debug_level)
		case MODE_XYZQ:
			if (vector.length < 4) {
				return  getFxDerivs6Dof33(
						 vector,       // double []         vector,
						 jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
						 debug_level); // final int         debug_level)
			} else {
				return  getFxDerivs6Dof(
						vector,       // double []         vector,
						jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
						debug_level); // final int         debug_level)
			}
		case MODE_COMBO:return  getFxDerivsVisual( // fill change
				 vector,       // double []         vector,
				 jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
				 debug_level); // final int         debug_level)
		case MODE_XYZQ_LOCAL:
			if (parameters_vector.length < 4) {
				return  getFxDerivs6DofMode33(
						vector,       // double []         vector,
						jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
						debug_level); // final int         debug_level)
			} else {
				return  getFxDerivs6DofMode3(
						vector,       // double []         vector,
						jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
						debug_level); // final int         debug_level)
			}
		case MODE_COMBO_LOCAL: 
			if (parameters_vector.length < 4) { // updated for [3]
				return  getFxDerivsVisualMode43( // fill change
						vector,       // double []         vector,
						jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
						debug_level); // final int         debug_level)
			} else {
				return  getFxDerivsVisualMode4( // fill change
						vector,       // double []         vector,
						jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
						debug_level); // final int         debug_level)
			}
		case MODE_COMPASS:return  getFxDerivsCompass( // fill change
				 vector,       // double []         vector,
				 jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
				 debug_level); // final int         debug_level)
		}
		// remains here for mode MODE_XYZ
		double [] fx = new double [weights.length];
		final double q0 = vector[0]; 
		final double q1 = vector[1]; 
		final double q2 = vector[2]; 
		final double q3 = vector[3];
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
				jt[i][samples * N] = 2 * vector[i];
			}
		}
		fx[samples * N] = q0*q0 + q1*q1 + q2 * q2 + q3*q3;
		double [] xyz_rot;
		double [][] xyz_dq;
		for (int i = 0; i < N; i++) {
			int i3 = 3 * i;
			final double [] xyz = new double [] {x_vector[i3 + 0],x_vector[i3 + 1],x_vector[i3 + 2]};
			xyz_rot = applyTo(vector, xyz);
			System.arraycopy(xyz_rot, 0, fx, i3, 3);
			if (jt != null) {
				xyz_dq =  applyToDQ(vector, xyz);
				for (int j = 0; j < 4; j++) {
					System.arraycopy(xyz_dq[j], 0, jt[j], i3, 3);
				}
			}
		}
		return fx;
	}
	
	private double [] getFxDerivsCompass(
			double []         vector,
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		double c = Math.cos(vector[0]/2), s = Math.sin(vector[0]/2);
		//axis
		double [] fx = new double [weights.length];
		final double [] q = new double [] { c, s*axis[0], s*axis[1], s*axis[2]};
		double [] dq_dv  =  new double [] {-s/2, c*axis[0]/2, c*axis[1]/2, c*axis[2]/2};
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
			}
		}
		double [] xyz_rot;
		double [][] xyz_dq;
		for (int i = 0; i < N; i++) {
			int i3 = 3 * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i3+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			final double [] xyz = new double [] {x_vector[i3 + 0],x_vector[i3 + 1],x_vector[i3 + 2]};
			xyz_rot = applyTo(q, xyz);
			System.arraycopy(xyz_rot, 0, fx, i3, 3);
			if (jt != null) {
				xyz_dq =  applyToDQ(q, xyz);
				for (int j = 0; j < 3; j++) {
					jt[0][i3+j] = 0.0;
					for (int k = 0; k < 4; k++) {
						jt[0][i3+j] += dq_dv[k] * xyz_dq[k][j];
					}
				}
			}
		}
		return fx;
	}
	
	/**
	 * Rotate camera X,Y,Z to ENU to reduce GNSS noise for georeferencing of the sequence
	 * Camera X,Y,Z are in y_vector 
	 * @param vector single-element angle from fitting GNSS to camera (will rotate in opposite direction)
	 * @return [nsample]{e,n,u}
	 */
	public double [][] cameraToENU(
			double []         vector) {
		double c = Math.cos(-vector[0]/2), s = Math.sin(-vector[0]/2); // inverse
		//axis
		double [][] camera_enu = new double [N][];
		final double [] q = new double [] { c, s*axis[0], s*axis[1], s*axis[2]};
//		double [] dq_dv  =  new double [] {-s/2, c*axis[0]/2, c*axis[1]/2, c*axis[2]/2};
		for (int i = 0; i < N; i++) {
			int i3 = 3 * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i3+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			double [] xyz = new double [] {y_vector[i3 + 0],y_vector[i3 + 1],y_vector[i3 + 2]};
			camera_enu[i] = applyTo(q, xyz);
		}
		return camera_enu;
	}

	private double [] getFxDerivs6Dof(
			double []         vector,
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		double [] fx = new double [weights.length];
		// TODO: Implement use3 (no Q0)
		final double q0 = vector[0]; 
		final double q1 = vector[1]; 
		final double q2 = vector[2]; 
		final double q3 = vector[3];
//		final double [] vector_r = normSign(new double[] {  q0,q1,q2,q3}); // was
		final double [] vector_r = normSign(new double[] { -q0,q1,q2,q3});
//		final double [] vector_r = normSign(new double[] { -q0,q2,q1,q3});
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
///				jt[i][samples * N] = 2 * vector[i];
			}
		}
///		fx[samples * N] = q0*q0 + q1*q1 + q2*q2 + q3*q3;
		double [] xyz_rot;
		double [] quat_rot;
		double [][] xyz_dq;
		double [][] quat_dq;
		for (int i = 0; i < N; i++) {
			int i7 = samples * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i7+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			
			// translations
			final double [] xyz = new double [] {x_vector[i7 + 0],x_vector[i7 + 1],x_vector[i7 + 2]};
			xyz_rot = applyTo(vector, xyz);
			System.arraycopy(xyz_rot, 0, fx, i7, 3);
			if (jt != null) {
				xyz_dq =  applyToDQ(vector, xyz);
				for (int j = 0; j < 4; j++) {
					System.arraycopy(xyz_dq[j], 0, jt[j], i7, 3);
				}
			}
			// rotations
			final double [] quat_r = {x_vector[i7 + 3],x_vector[i7 + 4],x_vector[i7 + 5],x_vector[i7 + 6]};
			quat_rot = composeQR_Q(vector_r, quat_r);
			System.arraycopy(quat_rot, 0, fx, i7+3, 4);
			if (jt != null) {
				
				quat_dq = composeQR_QdQ(vector_r,quat_r, true);
				for (int j = 0; j < 4; j++) {
					System.arraycopy(quat_dq[j], 0, jt[j], i7+3, 4);
				}
			}			
		}
		return fx;
	}

	private double [] getFxDerivs6DofMode3(
			double []         vector,
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		double [] fx = new double [weights.length];
		final double q0 = vector[0]; 
		final double q1 = vector[1]; 
		final double q2 = vector[2]; 
		final double q3 = vector[3];
		final double [] vector_r = normSign(new double[] { q0,q1,q2,q3});
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
///				jt[i][samples * N] = 2 * vector[i];
			}
		}
		///		fx[samples * N] = q0*q0 + q1*q1 + q2*q2 + q3*q3;
		
		double [] xyz_rot;
		double [] quat_rot;
		double [][] xyz_dq;
		double [][] quat_dq;
		double [][] inv_y = new double [][] {new double[3],new double[4]}; 			
		for (int i = 0; i < N; i++) {
			int i7 = samples * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i7+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			
			// translations
			final double [] xyz = new double [] {x_vector[i7 + 0],x_vector[i7 + 1],x_vector[i7 + 2]};
			// rotations
			final double [] quat_r = {x_vector[i7 + 3],x_vector[i7 + 4],x_vector[i7 + 5],x_vector[i7 + 6]};
			xyz_rot = applyTo(vector, xyz);
			quat_rot = composeQR_Q(vector_r, quat_r);
			System.arraycopy(y_inv_vector, i7,   inv_y[0], 0, 3);
			System.arraycopy(y_inv_vector, i7+3, inv_y[1], 0, 4);
			
		    double [][] comb_y = combineTransRot( // 
		            inv_y[0],   // double [] xyz_src,     // transformation to apply to (was reference_xyz)
		            inv_y[1],   // double [] quat_src,    // transformation to apply to (was reference_atr)
		            xyz_rot,    // double [] xyz_target,  // to which is applied (was scene_xyz)
		            quat_rot);  // double [] quat_target  // to which is applied (was scene_atr)		
			System.arraycopy(comb_y[0], 0, fx, i7,   3);
			System.arraycopy(comb_y[1], 0, fx, i7+3, 4);

			
			if (jt != null) {
				xyz_dq =  applyToDQ(vector, xyz);
				double [][] xyz_dq_local = new double [xyz_dq.length][];
				for (int j = 0; j < xyz_dq.length; j++) {
					xyz_dq_local[j] =combineTransRot(
				            null,       // double [] xyz_src,     // transformation to apply to (was reference_xyz)
				            inv_y[1],   // double [] quat_src,    // transformation to apply to (was reference_atr)
				            xyz_dq[j],  // double [] xyz_target,  // to which is applied (was scene_xyz)
				            null)[0];   // double [] quat_target  // to which is applied (was scene_atr)		
				}
				for (int j = 0; j < 4; j++) {
					System.arraycopy(xyz_dq_local[j], 0, jt[j], i7, 3);
				}
				quat_dq = composeQR_QdQ(vector_r,quat_r,true);
				// 2 alternative ways with the same result
				if (debug_level < 1000) {
					double [][] invy_mat = qMat(inv_y[1]);
					double [][] quat_dq_local = mulMat(quat_dq, invy_mat);
					for (int j = 0; j < 4; j++) {
						System.arraycopy(quat_dq_local[j], 0, jt[j], i7+3, 4);
					}
				} else {
					double [][] dcomp_dsecond = composeDR(inv_y[1]);
					double [][] quat_dq_local1 = new double [4][];
					for (int j = 0; j < 4; j++) {
						quat_dq_local1[j] = mulMat(dcomp_dsecond, quat_dq[j]);
					}
					for (int j = 0; j < 4; j++) {
						System.arraycopy(quat_dq_local1[j], 0, jt[j], i7+3, 4);
					}
				}
			}			
		}
		return fx;
	}
	private double [] getFxDerivs6DofMode33( // MODE_XYZQ_LOCAL =   3; // OK with [3]
			double []         vector, //
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		double [] fx = new double [weights.length];
		final double q1 = vector[0]; 
		final double q2 = vector[1]; 
		final double q3 = vector[2];
		final double q0 = getQ0(vector); 
//		final double [] vector_r = normSign(new double[] { q0,q1,q2,q3});
		final double [] vector_r = new double[] { q0,q1,q2,q3};
		 
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
//				jt[i][samples * N] = 2 * vector[i];
			}
		}
//		fx[samples * N] = q0*q0 + q1*q1 + q2*q2 + q3*q3;
		double [] xyz_rot;
		double [] quat_rot;
		double [][] xyz_dq;
		double [][] quat_dq;
		double [][] inv_y = new double [][] {new double[3],new double[4]}; 			
		for (int i = 0; i < N; i++) {
			int i7 = samples * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i7+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			
			// translations
			final double [] xyz = new double [] {x_vector[i7 + 0],x_vector[i7 + 1],x_vector[i7 + 2]};
			// rotations
			final double [] quat_r = {x_vector[i7 + 3],x_vector[i7 + 4],x_vector[i7 + 5],x_vector[i7 + 6]};
			xyz_rot = applyTo(vector_r, xyz);
			quat_rot = composeQR_Q(vector_r, quat_r);
			System.arraycopy(y_inv_vector, i7,   inv_y[0], 0, 3);
			System.arraycopy(y_inv_vector, i7+3, inv_y[1], 0, 4);
			
		    double [][] comb_y = combineTransRot( // 
		            inv_y[0],   // double [] xyz_src,     // transformation to apply to (was reference_xyz)
		            inv_y[1],   // double [] quat_src,    // transformation to apply to (was reference_atr)
		            xyz_rot,    // double [] xyz_target,  // to which is applied (was scene_xyz)
		            quat_rot);  // double [] quat_target  // to which is applied (was scene_atr)		
			System.arraycopy(comb_y[0], 0, fx, i7,   3);
			System.arraycopy(comb_y[1], 0, fx, i7+3, 4);

			
			if (jt != null) {
				xyz_dq =  applyToDQ(vector_r, xyz);
				double [][] xyz_dq_local = new double [xyz_dq.length][];
				for (int j = 0; j < xyz_dq.length; j++) {
					xyz_dq_local[j] =combineTransRot(
				            null,       // double [] xyz_src,     // transformation to apply to (was reference_xyz)
				            inv_y[1],   // double [] quat_src,    // transformation to apply to (was reference_atr)
				            xyz_dq[j],  // double [] xyz_target,  // to which is applied (was scene_xyz)
				            null)[0];   // double [] quat_target  // to which is applied (was scene_atr)		
				}
				double [][] xyz_dq_local3 = dQuat123(
						vector,        // double [] q123,
						xyz_dq_local,  // double [][] dq0123, // [4][]
						q0);           // double q0)
				for (int j = 0; j < xyz_dq_local3.length; j++) { // xyz_dq_local3.length==3
					System.arraycopy(xyz_dq_local3[j], 0, jt[j], i7, 3);
				}
				quat_dq = composeQR_QdQ(vector_r,quat_r,true);
				// 2 alternative ways with the same result
				if (debug_level < 1000) {
					double [][] invy_mat = qMat(inv_y[1]);
					double [][] quat_dq_local = mulMat(quat_dq, invy_mat);
					double [][] quat_dq_local3 =dQuat123(
							vector,        // double [] q123,
							quat_dq_local,  // double [][] dq0123, // [4][]
							q0);           // double q0)
 
					for (int j = 0; j < quat_dq_local3.length; j++) {
						System.arraycopy(quat_dq_local3[j], 0, jt[j], i7+3, 4);
					}
				} else {
					double [][] dcomp_dsecond = composeDR(inv_y[1]);
					double [][] quat_dq_local1 = new double [4][];
					for (int j = 0; j < quat_dq_local1.length; j++) {
						quat_dq_local1[j] = mulMat(dcomp_dsecond, quat_dq[j]);
					}
					double [][] quat_dq_local13 =dQuat123(
							vector,        // double [] q123,
							quat_dq_local1,  // double [][] dq0123, // [4][]
							q0);           // double q0)

					for (int j = 0; j < quat_dq_local13.length; j++) {
						System.arraycopy(quat_dq_local13[j], 0, jt[j], i7+3, 4);
					}
				}
			}			
		}
		return fx;
	}
	
	private double [] getFxDerivs6Dof43( // vector[4], but only 3 for rotations
			double []         vector, //
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		boolean use_inv = true;
		double [] fx = new double [weights.length];
		double [] qn = new double[4];
		boolean [] invert = { false, use_inv, use_inv, use_inv};
		final double [] vector_r = normSign(vector).clone(); // should be already q0>0 
		invertDeriv(vector_r, invert);
//		final double [] vector_r = normSign(new double[] { use_inv? -vector[0]:vector[0],vector[1],vector[2],vector[3]});
		final double l = qNorm (vector_r, qn); // calculates qn // normalized
		final double [][] dQn_dQ = dQndQ(vector_r);
		double [] xyz_rot;
		double [] quat_rot;
		double [][] xyz_dq;
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
			}
		}
		
		for (int i = 0; i < N; i++) {
			int i6 = samples * i;
			int i7 = samples_x * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i6+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			
			// translations
			final double [] xyz = new double [] {x_vector[i7 + 0],x_vector[i7 + 1],x_vector[i7 + 2]};
			xyz_rot = applyTo(vector, xyz); // rotate + scale
			System.arraycopy(xyz_rot, 0, fx, i6, 3);
			if (jt != null) {
				xyz_dq =  applyToDQ(vector, xyz);
				for (int j = 0; j < 4; j++) {
					System.arraycopy(xyz_dq[j], 0, jt[j], i6, 3);
				}
			}
			// rotations
			final double [] quat_r = {x_vector[i7 + 3],x_vector[i7 + 4],x_vector[i7 + 5],x_vector[i7 + 6]};
			quat_rot = composeQR_Q(qn, quat_r); // qn - normalized vector_r
			System.arraycopy(quat_rot, 1, fx, i6+3, 3);
			if (jt != null) {
				double[][] quat_dqn = composeQR_QdQ(qn,quat_r,false); // use_inv); //
				double[][] quat_dq = mulMat(dQn_dQ, quat_dqn);
				invertDeriv(quat_dq, invert);
				for (int j = 0; j < 4; j++) {
					System.arraycopy(quat_dq[j], 1, jt[j], i6+3, 3);
				}
			}			
		}
		return fx;
	}
	
	private double [] getFxDerivs6Dof33( // vector[3], but only 3 for rotations MODE_XYZQ tested
			double []         vector3, //
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		boolean use_inv = false; // true;
		double [] vector = new double[] {getQ0(vector3),vector3[0],vector3[1],vector3[2]};
		double [] fx = new double [weights.length];
		double [] qn = new double[4];
		boolean [] invert = { false, use_inv, use_inv, use_inv};
		final double [] vector_r = normSign(vector).clone(); // should be already q0>0 
		invertDeriv(vector_r, invert);
		qNorm (vector_r, qn); // calculates qn // normalized
//		final double [][] dQn_dQ = dQndQ(vector_r);
		final double [][] dQn_dQ123 = dQ_dQ123(vector3);
		
		
		double [] xyz_rot;
		double [] quat_rot;
		double [][] xyz_dq;
		if (jt != null) {
			for (int i = 0; i < jt.length; i++) {
				jt[i] = new double [weights.length];
			}
		}
		
		for (int i = 0; i < N; i++) {
			int i6 = samples * i;
			int i7 = samples_x * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i6+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			
			// translations
			final double [] xyz = new double [] {x_vector[i7 + 0],x_vector[i7 + 1],x_vector[i7 + 2]};
			xyz_rot = applyTo(vector, xyz); // rotate + scale
			System.arraycopy(xyz_rot, 0, fx, i6, 3);
			if (jt != null) {
				double [][] xyz_dqn =  applyToDQ(vector, xyz);
				/*
				xyz_dq = mulMat(dQn_dQ, xyz_dqn);
				for (int j = 1; j < 4; j++) {
					System.arraycopy(xyz_dq[j], 0, jt[j-1], i6, 3);
				}
				*/
				xyz_dq = mulMat(dQn_dQ123, xyz_dqn);
				for (int j = 0; j < xyz_dq.length; j++) { // 3
					System.arraycopy(xyz_dq[j], 0, jt[j], i6, 3);
				}
			}
			// rotations
			final double [] quat_r = {x_vector[i7 + 3],x_vector[i7 + 4],x_vector[i7 + 5],x_vector[i7 + 6]};
			quat_rot = composeQR_Q(qn, quat_r); // qn - normalized vector_r
			System.arraycopy(quat_rot, 1, fx, i6+3, 3);
			if (jt != null) {
				double[][] quat_dqn = composeQR_QdQ(qn,quat_r,false); // use_inv); //
				/*
				double[][] quat_dq = mulMat(dQn_dQ, quat_dqn);
				invertDeriv(quat_dq, invert);
				for (int j = 1; j < 4; j++) {
					System.arraycopy(quat_dq[j], 1, jt[j-1], i6+3, 3);
				}
				*/
				double[][] quat_dq = mulMat(dQn_dQ123, quat_dqn); // [3][4]
				invertDeriv(quat_dq, invert);
				for (int j = 0; j < quat_dq.length; j++) { // 3
					System.arraycopy(quat_dq[j], 1, jt[j], i6+3, 3);
				}
			}			
		}
		if (weights.length > N*samples) {
			for (int i = 0; i < vector3.length; i++) {
				fx[samples*N + i] = vector3[i];
				if (jt != null) {
					jt[i][samples*N + i] = 1.0;
				}				
			}	
		}
		return fx;
	}
	
	
	private double compareJT(
			double [] vector,
			double    delta) {
		double []  errors=new double [vector.length];
		double [][] jt =  new double [vector.length][];
//		System.out.println("Parameters vector = ["+vector[0]+", "+vector[1]+", "+vector[2]+", "+vector[3]+"]");
		System.out.print("Parameters vector = [");
		for (int i = 0; i < vector.length; i++) {
			System.out.print(vector[i]);
			if (i < (vector.length -1)) System.out.print(", ");
		}
		System.out.println("]");
		getFxDerivs(
				vector,
				jt, // final double [][] jt, // should be null or initialized with [vector.length][]
				1); // debug_level);
		double [][] jt_delta =  getFxDerivsDelta(
				vector, // double []         vector,
				delta, // final double      delta,
				-1); // final int         debug_level)
		for (int n = 0; n < weights.length; n++) if (weights[n] > 0) {
			System.out.print(String.format("%3d",n));
			for (int i = 0; i < vector.length; i++) {
				System.out.print(String.format("\t%12.9f",jt[i][n]));
			}			
			for (int i = 0; i < vector.length; i++) {
				System.out.print(String.format("\t%12.9f",jt_delta[i][n]));
			}			
			for (int i = 0; i < vector.length; i++) {
				System.out.print(String.format("\t%12.9f",jt[i][n]-jt_delta[i][n]));
			}			
			System.out.println();
			/*
			System.out.println(String.format(
					"%3d\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f\t%12.9f",
					n, jt[0][n], jt[1][n], jt[2][n], jt[3][n],
					jt_delta[0][n], jt_delta[1][n], jt_delta[2][n], jt_delta[3][n],
					jt[0][n]-jt_delta[0][n],jt[1][n]-jt_delta[1][n],jt[2][n]-jt_delta[2][n],jt[3][n]-jt_delta[3][n]));
					*/
			for (int i = 0; i < vector.length; i++) {
				errors[i] = Math.max(errors[i], jt[i][n]-jt_delta[i][n]);
			}
		}
		for (int i = 0; i < vector.length; i++) {
			System.out.print("\t\t");
		}			
		for (int i = 0; i < vector.length; i++) {
			System.out.print(String.format("\t%12.9f",errors[i]));
		}			
        /*
		System.out.println(String.format(
				"-\t-\t-\t-\t-\t-\t-\t-\t-\t%12.9f\t%12.9f\t%12.9f\t%12.9f",
				errors[0], errors[1], errors[2], errors[3]));
				*/
		double err=0;
		for (int i = 0; i < vector.length; i++) {
			err = Math.max(errors[i], err);
		}
		return err;
	}
	
	
	private double [][] getFxDerivsDelta(
			double []         vector,
			final double      delta,
			final int         debug_level) {
		double [][] jt =  new double [vector.length][weights.length];
		for (int nv = 0; nv < vector.length; nv++) {
			double [] vpm = vector.clone();
			vpm[nv]+= 0.5*delta;
			double [] fx_p =  getFxDerivs(
					vpm,
					null, // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);
			vpm[nv]-= delta;
			double [] fx_m =  getFxDerivs(
					vpm,
					null, // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);
			for (int i = 0; i < weights.length; i++) if (weights[i] > 0) {
				jt[nv][i] = (fx_p[i]-fx_m[i])/delta;
			}
		}
		return jt;
	}

	private double [] getFxDerivsVisual(
			double []         vector,
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		double [] fx = new double [weights.length];
		final double q0 = vector[0]; 
		final double q1 = vector[1]; 
		final double q2 = vector[2]; 
		final double q3 = vector[3];
		final double [] vector_r = normSign(vector);
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
				jt[i][samples * N] = 2 * vector[i];
			}
		}
		fx[samples * N] = q0*q0 + q1*q1 + q2*q2 + q3*q3;
		double [] xyz_rot;
		double [] quat_rot;
		double [][] xyz_dq;
		double [][] quat_dq;
		
		for (int i = 0; i < N; i++) {
			int i4 = samples * i;
			int i7 = samples_x * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i4+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}

			// translations
			final double [] xyz = new double [] {x_vector[i7 + 0],x_vector[i7 + 1],x_vector[i7 + 2]};
			xyz_rot = applyTo(vector, xyz);
			final double [] quat_r = { x_vector[i7 + 3],x_vector[i7 + 4],x_vector[i7 + 5],x_vector[i7 + 6]};
//			quat_rot = composeQR_Q(vector_r, quat_r);
			quat_rot = composeQR_Q(vector_r, quat_r);
			double [] quat_rot1 = compose(vector_r, quat_r);
			// combined samples
			fx[i4 + 0] = xyz_rot[2] / height;                   // Z
			fx[i4 + 1] = 2 * quat_rot[3];                       // 2 * Q3
			fx[i4 + 2] = 2 * quat_rot[2] - xyz_rot[0] / height; // 2 * Q2 - X / height
			fx[i4 + 3] = 2 * quat_rot[1] + xyz_rot[1] / height; // 2 * Q1 + Y / height
			if (jt != null) {
				xyz_dq =  applyToDQ(vector, xyz);
//				quat_dq = composeQR_QdQ(vector,quat_r);
				quat_dq = composeQR_QdQ(vector_r,quat_r,true);
				// Z
				jt[0][i4 + 0] = xyz_dq[0][2] / height;
				jt[1][i4 + 0] = xyz_dq[1][2] / height;
				jt[2][i4 + 0] = xyz_dq[2][2] / height;
				jt[3][i4 + 0] = xyz_dq[3][2] / height;
				// 2 * Q3
				jt[0][i4 + 1] = 2 * quat_dq[0][3];
				jt[1][i4 + 1] = 2 * quat_dq[1][3]; 
				jt[2][i4 + 1] = 2 * quat_dq[2][3];
				jt[3][i4 + 1] = 2 * quat_dq[3][3];
				// 2 * Q2 - X / height                
				jt[0][i4 + 2] = 2 * quat_dq[0][2] - xyz_dq[0][0] / height;
				jt[1][i4 + 2] = 2 * quat_dq[1][2] - xyz_dq[1][0] / height;
				jt[2][i4 + 2] = 2 * quat_dq[2][2] - xyz_dq[2][0] / height;
				jt[3][i4 + 2] = 2 * quat_dq[3][2] - xyz_dq[3][0] / height;
				// 2 * Q1 + Y / height                
				jt[0][i4 + 3] = 2 * quat_dq[0][1] + xyz_dq[0][1] / height;
				jt[1][i4 + 3] = 2 * quat_dq[1][1] + xyz_dq[1][1] / height;
				jt[2][i4 + 3] = 2 * quat_dq[2][1] + xyz_dq[2][1] / height;
				jt[3][i4 + 3] = 2 * quat_dq[3][1] + xyz_dq[3][1] / height;
			}			
		}
		return fx;
	}
	
	private double [] getFxDerivsVisualMode4(  // seems rotation is opposite sign
			double []         vector,
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		double [] fx = new double [weights.length];
//		final double q0 = vector[0]; 
//		final double q1 = vector[1]; 
//		final double q2 = vector[2]; 
//		final double q3 = vector[3];
		final double [] vector_r = normSign(vector);
		if (jt != null) {
			for (int i = 0; i < vector.length; i++) {
				jt[i] = new double [weights.length];
//				jt[i][samples * N] = 2 * vector[i];
			}
		}
//		fx[samples * N] = q0*q0 + q1*q1 + q2*q2 + q3*q3;
		double [] xyz_rot;
		double [] quat_rot;
		double [][] xyz_dq;
		double [][] quat_dq;
		double [][] inv_y = new double [][] {new double[3],new double[4]}; 			
		for (int i = 0; i < N; i++) {
			int i4 = samples * i;
			int i7 = samples_x * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i4+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			// translations
			final double [] xyz = new double [] {x_vector[i7 + 0],x_vector[i7 + 1],x_vector[i7 + 2]};
			// rotations
			final double [] quat_r = {x_vector[i7 + 3],x_vector[i7 + 4],x_vector[i7 + 5],x_vector[i7 + 6]};
			xyz_rot = applyTo(vector, xyz);
			quat_rot = composeQR_Q(vector_r, quat_r);
			System.arraycopy(y_inv_vector, i7,   inv_y[0], 0, 3);
			System.arraycopy(y_inv_vector, i7+3, inv_y[1], 0, 4);
		    double [][] comb_y = combineTransRot( // 
		            inv_y[0],   // double [] xyz_src,     // transformation to apply to (was reference_xyz)
		            inv_y[1],   // double [] quat_src,    // transformation to apply to (was reference_atr)
		            xyz_rot,    // double [] xyz_target,  // to which is applied (was scene_xyz)
		            quat_rot);  // double [] quat_target  // to which is applied (was scene_atr)
			fx[i4 + 0] = comb_y[0][2]/ height; // xyz_rot[2] / height;                   // Z
			fx[i4 + 1] = 2 * comb_y[1][3]; // quat_rot[3];                       // 2 * Q3
			fx[i4 + 2] = 2 * comb_y[1][2] - comb_y[0][0]/ height; // quat_rot[2] - xyz_rot[0] / height; // 2 * Q2 - X / height
			fx[i4 + 3] = 2 * comb_y[1][1] + comb_y[0][1]/ height; // quat_rot[1] + xyz_rot[1] / height; // 2 * Q1 + Y / height
			
			if (jt != null) {
				xyz_dq =  applyToDQ(vector, xyz);
				double [][] xyz_dq_local = new double [xyz_dq.length][];
				for (int j = 0; j < xyz_dq.length; j++) {
					xyz_dq_local[j] =combineTransRot(
				            null,       // double [] xyz_src,     // transformation to apply to (was reference_xyz)
				            inv_y[1],   // double [] quat_src,    // transformation to apply to (was reference_atr)
				            xyz_dq[j],  // double [] xyz_target,  // to which is applied (was scene_xyz)
				            null)[0];   // double [] quat_target  // to which is applied (was scene_atr)		
				}
				quat_dq = composeQR_QdQ(vector_r,quat_r,true);
				double [][] invy_mat = qMat(inv_y[1]);
				double [][] quat_dq_local = mulMat(quat_dq, invy_mat);
				// Z
				jt[0][i4 + 0] = xyz_dq_local[0][2] / height;
				jt[1][i4 + 0] = xyz_dq_local[1][2] / height;
				jt[2][i4 + 0] = xyz_dq_local[2][2] / height;
				jt[3][i4 + 0] = xyz_dq_local[3][2] / height;
				// 2 * Q3
				jt[0][i4 + 1] = 2 * quat_dq_local[0][3];
				jt[1][i4 + 1] = 2 * quat_dq_local[1][3]; 
				jt[2][i4 + 1] = 2 * quat_dq_local[2][3];
				jt[3][i4 + 1] = 2 * quat_dq_local[3][3];
				// 2 * Q2 - X / height                
				jt[0][i4 + 2] = 2 * quat_dq_local[0][2] - xyz_dq_local[0][0] / height;
				jt[1][i4 + 2] = 2 * quat_dq_local[1][2] - xyz_dq_local[1][0] / height;
				jt[2][i4 + 2] = 2 * quat_dq_local[2][2] - xyz_dq_local[2][0] / height;
				jt[3][i4 + 2] = 2 * quat_dq_local[3][2] - xyz_dq_local[3][0] / height;
				// 2 * Q1 + Y / height                
				jt[0][i4 + 3] = 2 * quat_dq_local[0][1] + xyz_dq_local[0][1] / height;
				jt[1][i4 + 3] = 2 * quat_dq_local[1][1] + xyz_dq_local[1][1] / height;
				jt[2][i4 + 3] = 2 * quat_dq_local[2][1] + xyz_dq_local[2][1] / height;
				jt[3][i4 + 3] = 2 * quat_dq_local[3][1] + xyz_dq_local[3][1] / height;
			}			
		}
		if (weights.length > N*samples) {
			for (int i = 0; i < vector.length; i++) {
				fx[samples*N + i] = vector[i];
				if (jt != null) {
					jt[i][samples*N + i] = 1.0;
				}				
			}	
		}
		return fx;
	}
	
	private double [] getFxDerivsVisualMode43( // tested MODE_COMBO_LOCAL =   4; // OK with [3]
			double []         vector3,
			final double [][] jt, // should be null or initialized with [vector.length][]
			final int         debug_level) {
		boolean dbg_out = debug_level>2;
		if (dbg_out) {
			dbg_data = new double[N * samples_x];
		}
		double [] vector = new double[] {getQ0(vector3),vector3[0],vector3[1],vector3[2]};
		double [] fx = new double [weights.length];
		double [] qn = new double[4];
		final double [] vector_r = normSign(vector).clone(); // should be already q0>0 
		qNorm (vector_r, qn); // calculates qn // normalized
		final double [][] dQn_dQ123 = dQ_dQ123(vector3);
		if (jt != null) {
			for (int i = 0; i < jt.length; i++) {
				jt[i] = new double [weights.length];
			}
		}
		double [] xyz_rot;
		double [] quat_rot;
		double [][] xyz_dq;
		double [][] quat_dq;
		double [][] inv_y = new double [][] {new double[3],new double[4]}; 			
		for (int i = 0; i < N; i++) {
			int i4 = samples * i;
			int i7 = samples_x * i;
			has_data:{
				for (int j = 0; j < samples; j++) {
					if (weights[i4+j] > 0) {
						break has_data;
					}
				}
				continue; // nothing to process for this scene 
			}
			// translations
			final double [] xyz = new double [] {x_vector[i7 + 0],x_vector[i7 + 1],x_vector[i7 + 2]};
			// rotations
			final double [] quat_r = {x_vector[i7 + 3],x_vector[i7 + 4],x_vector[i7 + 5],x_vector[i7 + 6]};
			xyz_rot = applyTo(vector, xyz);
			quat_rot = composeQR_Q(vector_r, quat_r);
			System.arraycopy(y_inv_vector, i7,   inv_y[0], 0, 3);
			System.arraycopy(y_inv_vector, i7+3, inv_y[1], 0, 4);
		    double [][] comb_y = combineTransRot( // 
		            inv_y[0],   // double [] xyz_src,     // transformation to apply to (was reference_xyz)
		            inv_y[1],   // double [] quat_src,    // transformation to apply to (was reference_atr)
		            xyz_rot,    // double [] xyz_target,  // to which is applied (was scene_xyz)
		            quat_rot);  // double [] quat_target  // to which is applied (was scene_atr)
			fx[i4 + 0] = comb_y[0][2]/ height; // xyz_rot[2] / height;                   // Z
			fx[i4 + 1] = 2 * comb_y[1][3]; // quat_rot[3];                       // 2 * Q3
			fx[i4 + 2] = 2 * comb_y[1][2] - comb_y[0][0]/ height; // quat_rot[2] - xyz_rot[0] / height; // 2 * Q2 - X / height
			fx[i4 + 3] = 2 * comb_y[1][1] + comb_y[0][1]/ height; // quat_rot[1] + xyz_rot[1] / height; // 2 * Q1 + Y / height
			if (dbg_out) {
				dbg_data[i7 + 0] = comb_y[0][0]/ height;
				dbg_data[i7 + 1] = comb_y[0][1]/ height;
				dbg_data[i7 + 2] = comb_y[0][2]/ height;
				dbg_data[i7 + 3] = comb_y[1][0];
				dbg_data[i7 + 4] = comb_y[1][1];
				dbg_data[i7 + 5] = comb_y[1][2];
				dbg_data[i7 + 6] = comb_y[1][3];
			}
			if (jt != null) {
				xyz_dq =  applyToDQ(vector, xyz);
				double [][] xyz_dq_local = new double [xyz_dq.length][];
				for (int j = 0; j < xyz_dq.length; j++) {
					xyz_dq_local[j] =combineTransRot(
				            null,       // double [] xyz_src,     // transformation to apply to (was reference_xyz)
				            inv_y[1],   // double [] quat_src,    // transformation to apply to (was reference_atr)
				            xyz_dq[j],  // double [] xyz_target,  // to which is applied (was scene_xyz)
				            null)[0];   // double [] quat_target  // to which is applied (was scene_atr)		
				}
				quat_dq = composeQR_QdQ(vector_r,quat_r, false); // true);
				double [][] invy_mat = qMat(inv_y[1]);
				double [][] quat_dq_local = mulMat(quat_dq, invy_mat);
				double [][] d_dQn = new double[4][4];
				
				// Z
				d_dQn[0][0] = xyz_dq_local[0][2] / height;
				d_dQn[1][0] = xyz_dq_local[1][2] / height;
				d_dQn[2][0] = xyz_dq_local[2][2] / height;
				d_dQn[3][0] = xyz_dq_local[3][2] / height;
                /*				
				// why all "-2"?
				// 2 * Q3
				d_dQn[0][1] = -2 * quat_dq_local[0][3];
				d_dQn[1][1] = -2 * quat_dq_local[1][3]; 
				d_dQn[2][1] = -2 * quat_dq_local[2][3];
				d_dQn[3][1] = -2 * quat_dq_local[3][3];
				// 2 * Q2 - X / height                
				d_dQn[0][2] = -2 * quat_dq_local[0][2] - xyz_dq_local[0][0] / height;
				d_dQn[1][2] = -2 * quat_dq_local[1][2] - xyz_dq_local[1][0] / height;
				d_dQn[2][2] = -2 * quat_dq_local[2][2] - xyz_dq_local[2][0] / height;
				d_dQn[3][2] = -2 * quat_dq_local[3][2] - xyz_dq_local[3][0] / height;
				// 2 * Q1 + Y / height                
				d_dQn[0][3] = -2 * quat_dq_local[0][1] + xyz_dq_local[0][1] / height;
				d_dQn[1][3] = -2 * quat_dq_local[1][1] + xyz_dq_local[1][1] / height;
				d_dQn[2][3] = -2 * quat_dq_local[2][1] + xyz_dq_local[2][1] / height;
				d_dQn[3][3] = -2 * quat_dq_local[3][1] + xyz_dq_local[3][1] / height;
				*/
				
				// 2 * Q3
				d_dQn[0][1] = 2 * quat_dq_local[0][3];
				d_dQn[1][1] = 2 * quat_dq_local[1][3]; 
				d_dQn[2][1] = 2 * quat_dq_local[2][3];
				d_dQn[3][1] = 2 * quat_dq_local[3][3];
				// 2 * Q2 - X / height                
				d_dQn[0][2] = 2 * quat_dq_local[0][2] - xyz_dq_local[0][0] / height;
				d_dQn[1][2] = 2 * quat_dq_local[1][2] - xyz_dq_local[1][0] / height;
				d_dQn[2][2] = 2 * quat_dq_local[2][2] - xyz_dq_local[2][0] / height;
				d_dQn[3][2] = 2 * quat_dq_local[3][2] - xyz_dq_local[3][0] / height;
				// 2 * Q1 + Y / height                
				d_dQn[0][3] = 2 * quat_dq_local[0][1] + xyz_dq_local[0][1] / height;
				d_dQn[1][3] = 2 * quat_dq_local[1][1] + xyz_dq_local[1][1] / height;
				d_dQn[2][3] = 2 * quat_dq_local[2][1] + xyz_dq_local[2][1] / height;
				d_dQn[3][3] = 2 * quat_dq_local[3][1] + xyz_dq_local[3][1] / height;
								
				//dQn_dQ123
				double [][] d_dq = mulMat(dQn_dQ123, d_dQn);
				for (int j = 0; j < d_dq.length; j++) { // 3
					System.arraycopy(d_dq[j], 0, jt[j], i4, samples);
				}
			}			
		}
		if (weights.length > N*samples) {
			for (int i = 0; i < vector3.length; i++) {
				fx[samples*N + i] = vector3[i];
				if (jt != null) {
					jt[i][samples*N + i] = 1.0;
				}				
			}	
		}
		
		return fx;
	}
	
	private double [] getYminusFxWeighted(
			final double []   fx,
			final double []   rms_fp, // null or [2]
			boolean noNaNs) {
		final double []     wymfw =       new double [fx.length];
		double s_rms=0, sxyz_rms=0; 
		double rms_pure=Double.NaN;
		double rms_pure_xyz=Double.NaN;
		boolean use_xyz_weight = (mode==MODE_XYZQ) || (mode==MODE_XYZQ_LOCAL) || (mode==MODE_XYZ4Q3);
//		int num_comp = use_6dof? 7 : 3;
		for (int i = 0; i < fx.length; i++) {
			double d = y_vector[i] - fx[i];
			double wd = d * weights[i];
			if (Double.isNaN(wd)) {
				System.out.println("getYminusFxWeighted(): weights["+i+"]="+weights[i]+", wd="+wd+
						", y_vector[i]="+y_vector[i]+", fx[i]="+fx[i]);
				if (noNaNs) {
					System.out.println("getYminusFxWeighted(): return null as noNaNs is true");
					if (rms_fp != null) {
						rms_fp[0] = Double.NaN;
						rms_fp[1] = Double.NaN;;
					}
					
					return null; 
				}
				wd = 0.0;
				d = 0.0;
			}
			int comp_index = i % samples;
			if (i == (samples * N)) {
				rms_pure =     Math.sqrt(s_rms/pure_weight);
				rms_pure_xyz = Math.sqrt(sxyz_rms/xyz_weight);
			}
			wymfw[i] = wd;
			double wd2 = d * wd;
			s_rms += wd2; // d * wd;
			if (use_xyz_weight && (comp_index < 3)) { // xyz
				sxyz_rms += wd2;
			}
		}
		double rms = Math.sqrt(s_rms); // assuming sum_weights == 1.0;
		if (Double.isNaN(rms_pure)) {
			rms_pure=rms;
			rms_pure_xyz = Math.sqrt(sxyz_rms/xyz_weight);
		}
		if (rms_fp != null) {
			rms_fp[0] = rms;
			rms_fp[1] = rms_pure;
			rms_fp[2] = rms_pure_xyz;
		}
		return wymfw;
	}
	
	// reusing multithreaded
	private double [][] getWJtJlambda( // USED in lwir
			final double      lambda,
			final double [][] jt)
	{
		final int num_pars = jt.length;
		final int num_pars2 = num_pars * num_pars;
		final int nup_points = jt[0].length;
		final double [][] wjtjl = new double [num_pars][num_pars];
		final Thread[] threads = ImageDtt.newThreadArray(ImageDtt.THREADS_MAX);
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				public void run() {
					for (int indx = ai.getAndIncrement(); indx < num_pars2; indx = ai.getAndIncrement()) {
						int i = indx / num_pars;
						int j = indx % num_pars;
						if (j >= i) {
							double d = 0.0;
							for (int k = 0; k < nup_points; k++) {
								if (jt[i][k] != 0) {
									d+=0;
								}
								d += weights[k]*jt[i][k]*jt[j][k];
							}
							wjtjl[i][j] = d;
							if (i == j) {
								wjtjl[i][j] += d * lambda;
							} else {
								wjtjl[j][i] = d;
							}
						}
					}
				}
			};
		}		      
		ImageDtt.startAndJoin(threads);
		return wjtjl;
	}
	
	
	public int runLma( // <0 - failed, >=0 iteration number (1 - immediately)
			double lambda,           // 0.1
			double lambda_scale_good,// 0.5
			double lambda_scale_bad, // 8.0
			double lambda_max,       // 100
			double rms_diff,         // 0.001
			int    num_iter,         // 20
			boolean last_run,
			int    debug_level) {
		boolean [] rslt = {false,false};
		this.last_rms = null; // remove?
		int iter = 0;
		for (iter = 0; iter < num_iter; iter++) {
			rslt =  lmaStep(
					lambda,
					rms_diff,
					debug_level);
			if (rslt == null) {
				return -1; // false; // need to check
			}
			if (debug_level > 1) {
				switch (mode) {
				case MODE_XYZQ:
				case MODE_XYZQ_LOCAL:
				case MODE_XYZ4Q3:
					double [] good_or_bad4 = getGoodOrBadRms();
					double [] initial_rms4 = getInitialRms();
					System.out.println("LMA step "+iter+": {"+rslt[0]+","+rslt[1]+"} full RMS= "+good_or_bad_rms[0] +
							" ("+initial_rms[0]+"), pure RMS="+good_or_bad_rms[1]+" ("+initial_rms[1]+"), "+
							"XYZ RMS="+good_or_bad4[2]+" ("+initial_rms4[2]+"), ATR RMS="+good_or_bad4[3]+" ("+initial_rms4[3]+"), "+
							"lambda="+lambda);
					break;
				default:
					System.out.println("LMA step "+iter+": {"+rslt[0]+","+rslt[1]+"} full RMS= "+good_or_bad_rms[0]+
							" ("+initial_rms[0]+"), pure RMS="+good_or_bad_rms[1]+" ("+initial_rms[1]+") + lambda="+lambda);
				}
			}
			if (rslt[1]) {
				break;
			}
			if (rslt[0]) { // good
				lambda *= lambda_scale_good;
			} else {
				lambda *= lambda_scale_bad;
				if (lambda > lambda_max) {
					break; // not used in lwir
				}
			}
		}
		if (rslt[0]) { // better
			if (iter >= num_iter) { // better, but num tries exceeded
				if (debug_level > 1) System.out.println("Step "+iter+": Improved, but number of steps exceeded maximal");
			} else {
				if (debug_level > 1) System.out.println("Step "+iter+": LMA: Success");
			}

		} else { // improved over initial ?
			if (last_rms[0] < initial_rms[0]) { // NaN
				rslt[0] = true;
				if (debug_level > 1) System.out.println("Step "+iter+": Failed to converge, but result improved over initial");
			} else {
				if (debug_level > 1) System.out.println("Step "+iter+": Failed to converge");
			}
		}
		boolean show_intermediate = true;
		if (show_intermediate && (debug_level > 0)) {
			switch (mode) {
			case MODE_XYZQ:
			case MODE_XYZQ_LOCAL:
			case MODE_XYZ4Q3:
				double [] last_rms4 =    getLastRms();
				double [] initial_rms4 = getInitialRms();
				System.out.println("LMA: full RMS="+last_rms[0]+" ("+initial_rms[0]+"), pure RMS="+last_rms[1]+" ("+initial_rms[1]+"), "+
						"XYZ RMS="+last_rms4[2]+" ("+initial_rms4[2]+"), ATR RMS="+last_rms4[3]+" ("+initial_rms4[3]+"), "+
						"lambda="+lambda);
				break;
			default:
				System.out.println("LMA: full RMS="+last_rms[0]+" ("+initial_rms[0]+"), pure RMS="+last_rms[1]+" ("+initial_rms[1]+"), "+
						"lambda="+lambda);
			}
		}
		if (debug_level > 2){ 
			String [] lines1 = printOldNew(false); // boolean allvectors)
			System.out.println("iteration="+iter);
			for (String line : lines1) {
				System.out.println(line);
			}
		}
		if (debug_level > 0) {
			if ((debug_level > 1) ||  last_run) { // (iter == 1) || last_run) {
				if (!show_intermediate) {
					switch (mode) {
					case MODE_XYZQ:
					case MODE_XYZQ_LOCAL:
					case MODE_XYZ4Q3:
						double [] last_rms4 =    getLastRms();
						double [] initial_rms4 = getInitialRms();
						System.out.println("LMA: full RMS="+last_rms[0]+" ("+initial_rms[0]+"), pure RMS="+last_rms[1]+" ("+initial_rms[1]+"), "+
								"XYZ RMS="+last_rms4[2]+" ("+initial_rms4[2]+"), ATR RMS="+last_rms4[3]+" ("+initial_rms4[3]+"), "+
								"lambda="+lambda);
						break;
					default:
						System.out.println("LMA: full RMS="+last_rms[0]+" ("+initial_rms[0]+"), pure RMS="+last_rms[1]+" ("+initial_rms[1]+"), "+
								"lambda="+lambda);
					}
					/*
					System.out.println("LMA: iter="+iter+",   full RMS="+last_rms[0]+" ("+initial_rms[0]+"), pure RMS="+last_rms[1]+
							" ("+initial_rms[1]+") + lambda="+lambda);
					*/
				}
				String [] lines = printOldNew(false); // boolean allvectors)
				for (String line : lines) {
					System.out.println(line);
				}
			}
		}
		if ((debug_level > -2) && !rslt[0]) { // failed
			if ((debug_level > 1) || (iter == 1) || last_run) {
				System.out.println("LMA failed on iteration = "+iter);
				String [] lines = printOldNew(true); // boolean allvectors)
				for (String line : lines) {
					System.out.println(line);
				}
			}
			System.out.println();
		}
		if (debug_level > 0) {
			double [] fx = getFxDerivs(
					parameters_vector, // double []         vector,
					null,              // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);      // final int         debug_level)
			debugYfX ( "fx-",   // String pfx,
					fx); // double [] data)
			if (debug_level > 2) {
				debugYfX ( "ffx-",   // String pfx,
						dbg_data); // double [] data)
			}
			if (debug_level > 1) {
				double    delta = 1E-5;
				System.out.println("\n\n");
				double err = compareJT(
						parameters_vector, // double [] vector,
						delta);            // double    delta);
				System.out.println("Maximal error = "+err);
			}

		}
		return rslt[0]? iter : -1;
	}
	
	public double [] getMinMaxDiag(
			int debug_level){
		double [][] jt = new double [parameters_vector.length][];    
		double [] fx = getFxDerivs(
				parameters_vector, // double []         vector,
				jt,                // final double [][] jt, // should be null or initialized with [vector.length][]
				debug_level);      // final int         debug_level)
		Matrix wjtjlambda = new Matrix(getWJtJlambda(
				0, // *10, // temporary
				jt)); // double [][] jt)
		double [] mn_mx= {Double.NaN,Double.NaN};
		for (int i = 0; i < parameters_vector.length; i++) {
			double d = wjtjlambda.get(i,i);
			if (!(d > mn_mx[0])) mn_mx[0] = d;
			if (!(d < mn_mx[1])) mn_mx[1] = d;
			
		}
		return mn_mx;
		
	}
	private boolean [] lmaStep(
			double lambda,
			double rms_diff,
			int debug_level) {
		boolean noNaNs = true;
		boolean [] rslt = {false,false};
		// maybe the following if() branch is not needed - already done in prepareLMA !
		if (this.last_rms == null) { //first time, need to calculate all (vector is valid)
			last_rms = new double[3];  // [2] - for XYZ
			if (debug_level > 1) {
				System.out.println("lmaStep(): first step");
			}
			double [] fx = getFxDerivs(
					parameters_vector, // double []         vector,
					last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);      // final int         debug_level)
			if (debug_level > 3) {
				 debugYfX ( "fx0-",   // String pfx,
						 fx); // double [] data)
			}
			if (debug_level > 4) {
				debugYfX ( "ffx0-",   // String pfx,
						dbg_data); // double [] data)
			}
			
			if (debug_level > 5) { // remove?
				double    delta = 1E-5;
				System.out.println("\n\n");
				double err = compareJT(
						parameters_vector, // double [] vector,
						delta);            // double    delta);
				System.out.println("Maximal error = "+err);
			}
			
			last_ymfx = getYminusFxWeighted(
					fx, // final double []   fx,
					last_rms, // final double []   rms_fp // null or [2]
					noNaNs); // boolean noNaNs)
			
			this.initial_rms = this.last_rms.clone();
			this.good_or_bad_rms = this.last_rms.clone();

			if (debug_level > -1) { // temporary
				/*
				dbgYminusFxWeight(
						this.last_ymfx,
						this.weights,
						"Initial_y-fX_after_moving_objects");
                */
			}
			if (last_ymfx == null) {
				return null; // need to re-init/restart LMA
			}
			// TODO: Restore/implement
			if (debug_level > 3) {
				/*
				 dbgJacobians(
							corr_vector, // GeometryCorrection.CorrVector corr_vector,
							1E-5, // double delta,
							true); //boolean graphic)
				*/
			}
		}
		Matrix y_minus_fx_weighted = new Matrix(this.last_ymfx, this.last_ymfx.length);

		Matrix wjtjlambda = new Matrix(getWJtJlambda(
				lambda, // *10, // temporary
				this.last_jt)); // double [][] jt)
		
		if (debug_level>2) {
			System.out.println("JtJ + lambda*diag(JtJ");
			wjtjlambda.print(18, 6);
		}
		Matrix jtjl_inv = null;
		try {
			jtjl_inv = wjtjlambda.inverse(); // check for errors
		} catch (RuntimeException e) {
			rslt[1] = true;
			if (debug_level > 0) {
				System.out.println("Singular Matrix!");
			}

			return rslt;
		}
		if (debug_level>2) {
			System.out.println("(JtJ + lambda*diag(JtJ).inv()");
			jtjl_inv.print(18, 6);
		}
//last_jt has NaNs
		Matrix jty = (new Matrix(this.last_jt)).times(y_minus_fx_weighted);
		if (debug_level>2) {
			System.out.println("Jt * (y-fx)");
			jty.print(18, 6);
		}
		
		
		Matrix mdelta = jtjl_inv.times(jty);
		if (debug_level>2) {
			System.out.println("mdelta");
			mdelta.print(18, 10);
		}

		double scale = 1.0;
		double []  delta =      mdelta.getColumnPackedCopy();
		double []  new_vector = parameters_vector.clone();
		for (int i = 0; i < parameters_vector.length; i++) {
			new_vector[i] += scale * delta[i];
		}
		
		
		double [] fx = getFxDerivs(
				new_vector, // double []         vector,
				last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
				debug_level);      // final int         debug_level)
		double [] rms = new double[3];
		last_ymfx = getYminusFxWeighted(
				fx, // final double []   fx,
				rms, // final double []   rms_fp // null or [2]
				noNaNs); // boolean noNaNs)
		if (debug_level > 2) {
			/*
			dbgYminusFx(this.last_ymfx, "next y-fX");
			dbgXY(new_vector, "XY-correction");
			*/
		}

//		if (last_ymfx == null) {
//			return new boolean[] {false,false}; // null; // need to re-init/restart LMA
//		}

		this.good_or_bad_rms = rms.clone();
		if ((rms[0] < this.last_rms[0]) && (last_ymfx != null)) { // improved
			rslt[0] = true;
			rslt[1] = rms[0] >=(this.last_rms[0] * (1.0 - rms_diff));
			this.last_rms = rms.clone();

			this.parameters_vector = new_vector.clone();
			if (debug_level > 2) {
				// print vectors in some format
				/*
				System.out.print("delta: "+corr_delta.toString()+"\n");
				System.out.print("New vector: "+new_vector.toString()+"\n");
				System.out.println();
				*/
			}
		} else { // worsened
			rslt[0] = false;
			rslt[1] = false; // do not know, caller will decide
			// restore state
			fx = getFxDerivs(
					parameters_vector, // double []         vector,
					last_jt,           // final double [][] jt, // should be null or initialized with [vector.length][]
					debug_level);      // final int         debug_level)
			last_ymfx = getYminusFxWeighted(
					fx, // final double []   fx,
					this.last_rms, // final double []   rms_fp // null or [2]
					noNaNs); // boolean noNaNs)
			
			if (last_ymfx == null) {
				return null; // need to re-init/restart LMA
			}
			if (debug_level > 2) {
				/*
				 dbgJacobians(
							corr_vector, // GeometryCorrection.CorrVector corr_vector,
							1E-5, // double delta,
							true); //boolean graphic)
							*/
			}
		}
		return rslt;
	}
	
	public void debugYfX (
			String pfx,
			double [] data) {
		if (data == null) {
			return;
		}
//		if ((mode == 1) || ((mode == 2) && (data.length >= x_vector.length))) { // different data size data[3*nscene+...]
		if ((mode == MODE_XYZ) || ((mode == MODE_COMPASS))) {
			System.out.println(String.format("%3s"+
					"\t%9s\t%9s\t%9s",
					"N",pfx+"X",pfx+"Y",pfx+"Z"));
			for (int nscene = 0; nscene < N; nscene++) {
				System.out.println(String.format("%3d"+
						"\t%9.5f\t%9.5f\t%9.5f",
						nscene,
						data[samples*nscene + 0],data[samples*nscene + 1],data[samples*nscene + 2]));
			}
			System.out.println();
//		} else  if (((mode == MODE_XYZQ) && ()) || (data.length >= x_vector.length)) { // different data size data[3*nscene+...]
		} else  if (data.length >= x_vector.length) { // different data size data[3*nscene+...]
			System.out.println(String.format("%3s"+
					"\t%9s\t%9s\t%9s\t%9s\t%9s\t%9s\t%9s"+ //x,y,z, q0,q1,q2,q3,a,t,r
					"\t%9s\t%9s\t%9s",
					"N",pfx+"X",pfx+"Y",pfx+"Z",pfx+"q0",pfx+"q1",pfx+"q2",pfx+"q3",
					pfx+"A",pfx+"T",pfx+"R"));
			for (int nscene = 0; nscene < N; nscene++) {
				if (   (data[samples_x*nscene + 3]*data[samples_x*nscene + 3] +
						data[samples_x*nscene + 4]*data[samples_x*nscene + 4] +
						data[samples_x*nscene + 5]*data[samples_x*nscene + 5]+
						data[samples_x*nscene + 6]*data[samples_x*nscene + 6]) < 0.001) {
					System.out.println(String.format("%3d\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-", nscene));
					continue;
				}
				Rotation rot = new Rotation(data[7*nscene + 3],data[7*nscene + 4],data[7*nscene + 5],data[7*nscene + 6], false);
				double []  angles = rot.getAngles(RotationOrder.YXZ, ErsCorrection.ROT_CONV);
				System.out.println(String.format("%3d"+
						"\t%9.5f\t%9.5f\t%9.5f\t%9.5f\t%9.5f\t%9.5f\t%9.5f"+
						"\t%9.5f\t%9.5f\t%9.5f",
						nscene,
						data[samples_x*nscene + 0],data[samples_x*nscene + 1],data[samples_x*nscene + 2],
						data[samples_x*nscene + 3],data[samples_x*nscene + 4],data[samples_x*nscene + 5],data[samples_x*nscene + 6],
						angles[0],angles[1],angles[2]));
			}
			System.out.println();
		} else  if ((mode == MODE_XYZ4Q3) || (mode == MODE_XYZQ)) { // not when 7-long, it should be catched before
			System.out.println(String.format("%3s"+
					"\t%9s\t%9s\t%9s\t%9s\t%9s\t%9s\t%9s"+ //x,y,z, q0,q1,q2,q3,a,t,r
					"\t%9s\t%9s\t%9s",
					"N",pfx+"X",pfx+"Y",pfx+"Z",pfx+"q0",pfx+"q1",pfx+"q2",pfx+"q3",
					pfx+"A",pfx+"T",pfx+"R"));
			for (int nscene = 0; nscene < N; nscene++) {
				if (   (x_vector[samples_x*nscene + 3]*x_vector[samples_x*nscene + 3] +
						x_vector[samples_x*nscene + 4]*x_vector[samples_x*nscene + 4] +
						x_vector[samples_x*nscene + 5]*x_vector[samples_x*nscene + 5]+
						x_vector[samples_x*nscene + 6]*x_vector[samples_x*nscene + 6]) < 0.001) {
					System.out.println(String.format("%3d\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-", nscene));
					continue;
				}
				double q1 = data[samples*nscene + 3];
				double q2 = data[samples*nscene + 4];
				double q3 = data[samples*nscene + 5];
				double q0 = Math.sqrt(1.0-q1*q1 - q2*q2 - q3*q3); // should be normalized!
				Rotation rot = new Rotation(q0,q1,q2,q3, false);
				double []  angles = rot.getAngles(RotationOrder.YXZ, ErsCorrection.ROT_CONV);
				System.out.println(String.format("%3d"+
						"\t%9.5f\t%9.5f\t%9.5f\t%9.5f\t%9.5f\t%9.5f\t%9.5f"+
						"\t%9.5f\t%9.5f\t%9.5f",
						nscene,
						data[samples*nscene + 0],data[samples*nscene + 1],data[samples*nscene + 2],
						q0,q1,q2,q3,
						angles[0],angles[1],angles[2]));
			}
			System.out.println();
		} else { // if (mode == 2) {
			System.out.println(String.format("%3s"+
					"\t%9s\t%9s\t%9s\t%9s", // Z, 2*Q3, 2*Q2-X, 2*Q1+Y
					"N",pfx+"Z",pfx+"2*Q3",pfx+"2*Q2-X",pfx+"2*Q1+Y"));
			for (int nscene = 0; nscene < N; nscene++) {
				System.out.println(String.format("%3d"+
						"\t%9.5f\t%9.5f\t%9.5f\t%9.5f",
						nscene,
						data[samples*nscene + 0], data[samples*nscene + 1],
						data[samples*nscene + 2], data[samples*nscene + 3]));
			}
			System.out.println();
		}
	}
	
	
	//TODO: implement
	public String [] printOldNew(boolean allvectors) {
		return new String[] {};
	}
	
	public static double [][] combineTransRot(
			double [] xyz_src,     // transformation to apply to (was reference_xyz)
			double [] quat_src,    // transformation to apply to (was reference_atr)
			double [] xyz_target,  // to which is applied (was scene_xyz)
			double [] quat_target  // to which is applied (was scene_atr)		
			) {
		double [] offs = applyTo(quat_src, xyz_target);
		double [] xyz = (xyz_src==null)? offs : addTo(xyz_src,offs);
		double [] quat = (quat_target==null)? null: compose(quat_src,quat_target); // includes normSign()
		return new double [][] {xyz,quat}; 
	}

	public static double [][] invertTransRot(
			double [] xyz_src,     // transformation to apply to (was reference_xyz)
			double [] quat_src){    // transformation to apply to (was reference_atr)
		double [] quat = normSign(new double[] {-quat_src[0],quat_src[1],quat_src[2],quat_src[3]});
		double [] xyz = applyTo(quat,new double [] {-xyz_src[0],-xyz_src[1],-xyz_src[2]});
		return new double [][] {xyz,quat};
	}

	
	/**
	 * Apply quaternion q to quaternion r
	 * @param q - 4 components (scalar, vector) of the quaternion to apply to the other one
	 * @param r - 4 components (scalar, vector) of the target quaternion to which to apply the first one
	 * @return composed quaternion
	 */
	public static double [] compose(
			double [] q,
			double [] r) {
		return normSign(new double [] {
				r[0] * q[0] - (r[1] * q[1] +  r[2] * q[2] + r[3] * q[3]),
				r[1] * q[0] +  r[0] * q[1] + (r[2] * q[3] - r[3] * q[2]),
				r[2] * q[0] +  r[0] * q[2] + (r[3] * q[1] - r[1] * q[3]),
				r[3] * q[0] +  r[0] * q[3] + (r[1] * q[2] - r[2] * q[1])});
	}
	
	public static double [] normSign(double [] q) {
		if (q[0] >= 0) return q;
		return new double [] {-q[0], -q[1], -q[2], -q[3]};
	}
	/**
	 * Apply quaternion q to quaternion r
	 * @param q - 4 components (scalar, vector) of the quaternion to apply to the other one
	 * @param r - 4 components (scalar, vector) of the target quaternion to which to apply the first one
	 * @return composed quaternion
	 */
	
	public static double [] composeQR_Q(
			double [] q,
			double [] r) {
		return normSign(new double [] {
				  -q[0]*(r[0]*q[0] - r[1]*q[1] - r[2]*q[2] - r[3]*q[3]) // s[0]
				  -q[1]*(r[1]*q[0] + r[0]*q[1] + r[2]*q[3] - r[3]*q[2]) // s[1]
				  -q[2]*(r[2]*q[0] + r[0]*q[2] + r[3]*q[1] - r[1]*q[3]) // s[2]
				  -q[3]*(r[3]*q[0] + r[0]*q[3] + r[1]*q[2] - r[2]*q[1]),// s[3];
				  
				   q[1]*(r[0]*q[0] - r[1]*q[1] - r[2]*q[2] - r[3]*q[3]) // s[0]
				  -q[0]*(r[1]*q[0] + r[0]*q[1] + r[2]*q[3] - r[3]*q[2]) // s[1]
				  +q[2]*(r[3]*q[0] + r[0]*q[3] + r[1]*q[2] - r[2]*q[1]) // s[3]
				  -q[3]*(r[2]*q[0] + r[0]*q[2] + r[3]*q[1] - r[1]*q[3]),// s[2]);

				   q[2]*(r[0]*q[0] - r[1]*q[1] - r[2]*q[2] - r[3]*q[3]) // s[0]
				  -q[0]*(r[2]*q[0] + r[0]*q[2] + r[3]*q[1] - r[1]*q[3]) // s[2]
				  +q[3]*(r[1]*q[0] + r[0]*q[1] + r[2]*q[3] - r[3]*q[2]) // s[1]
				  -q[1]*(r[3]*q[0] + r[0]*q[3] + r[1]*q[2] - r[2]*q[1]),// s[3]);

				   q[3]*(r[0]*q[0] - r[1]*q[1] - r[2]*q[2] - r[3]*q[3]) // s[0]
				  -q[0]*(r[3]*q[0] + r[0]*q[3] + r[1]*q[2] - r[2]*q[1]) // s[3]
				  +q[1]*(r[2]*q[0] + r[0]*q[2] + r[3]*q[1] - r[1]*q[3]) // s[2]
				  -q[2]*(r[1]*q[0] + r[0]*q[1] + r[2]*q[3] - r[3]*q[2]) // s[1]);
		});	
	}
	/**
	 * Get derivatives of the composed quaternion (composeQR_Q(q,r)) by the
	 * components of the rotation (q). Rotation is Q*R*Q~
	 * @param q  4 components (scalar, vector) of the quaternion Q to apply to
	 *        scene rotations r.
	 * @param r  4 components (scalar, vector) of the quaternion to which
	 *            the first one is applied. 
	 * @return 4x4 array, where columns correspond to composition components
	 *         (samples in LMA) and rows - to the target quaternion.
	 */
	public static double [][] composeQR_QdQ(
			double [] q,
			double [] r,
			boolean use_inv) {
//		boolean use_inv = true;
		if (use_inv) {
			return new double [][] {
				// for inverted {-q0,q1,q2,q3}
				//  t=s*q' d/dQ0
				{ 2*r[0]*q[0],
					2*r[1]*q[0] + 2*r[2]*q[3] - 2*r[3]*q[2],
					2*r[2]*q[0] + 2*r[3]*q[1] - 2*r[1]*q[3],
					2*r[3]*q[0] + 2*r[1]*q[2] - 2*r[2]*q[1]},
				// t=s*q' d/dQ1 
				{-2*r[0]*q[1],
						-2*r[1]*q[1] - 2*r[2]*q[2] - 2*r[3]*q[3],
						-2*r[1]*q[2] - 2*r[3]*q[0] + 2*r[2]*q[1],
						+2*r[2]*q[0] + 2*r[3]*q[1] - 2*r[1]*q[3]},
				// t=s*q'd/dQ2
				{-2*r[0]*q[2],
							-2*r[2]*q[1] + 2*r[3]*q[0] + 2*r[1]*q[2],
							-2*r[1]*q[1] - 2*r[2]*q[2] - 2*r[3]*q[3],
							-2*r[2]*q[3] - 2*r[1]*q[0] + 2*r[3]*q[2]},
				// t=s*q'd/dQ3
				{-2*r[0]*q[3],
								-2*r[3]*q[1] - 2*r[2]*q[0] + 2*r[1]*q[3],
								-2*r[3]*q[2] + 2*r[1]*q[0] + 2*r[2]*q[3],
								-2*r[1]*q[1] - 2*r[2]*q[2] - 2*r[3]*q[3]}
			};
		} else {
			return new double [][] {
				// for non-inverted {q0,q1,q2,q3}
				//  t=s*q' d/dQ0    
				{ 2*r[0]*q[0],
					2*r[1]*q[0] + 2*r[2]*q[3] - 2*r[3]*q[2],
					2*r[2]*q[0] + 2*r[3]*q[1] - 2*r[1]*q[3],
					2*r[3]*q[0] + 2*r[1]*q[2] - 2*r[2]*q[1]},
				// t=s*q' d/dQ1 
				{ 2*r[0]*q[1],
						2*r[1]*q[1] + 2*r[2]*q[2] + 2*r[3]*q[3],
						2*r[1]*q[2] + 2*r[3]*q[0] - 2*r[2]*q[1],
						-2*r[2]*q[0] - 2*r[3]*q[1] + 2*r[1]*q[3]},
				// t=s*q'd/dQ2
				{ 2*r[0]*q[2],
							2*r[2]*q[1] - 2*r[3]*q[0] - 2*r[1]*q[2],
							2*r[1]*q[1] + 2*r[2]*q[2] + 2*r[3]*q[3],
							2*r[2]*q[3] + 2*r[1]*q[0] - 2*r[3]*q[2]},
				// t=s*q'd/dQ3
				{ 2*r[0]*q[3],
								2*r[3]*q[1] + 2*r[2]*q[0] - 2*r[1]*q[3],
								2*r[3]*q[2] - 2*r[1]*q[0] - 2*r[2]*q[3],
								2*r[1]*q[1] + 2*r[2]*q[2] + 2*r[3]*q[3]}
			};
		}
	}

	/**
	 * Get derivatives of the composed quaternion (compose(q,r)) by the
	 * components of the first one (q). These derivatives do not depend
	 * on the first quaternion, so it is not in the input. 
	 * @param r  4 components (scalar, vector) of the quaternion to which
	 *            the first one is applied. 
	 * @return 4x4 array, where columns correspond to composition components
	 *         (samples in LMA) and rows - to the second quaternion
	 *         (missing from the input) components.
	 */
	public static double [][] composeDQ( // not used
			double [] r) {
		return new double [][] {
			{ r[0], r[1], r[2], r[3]},            
			{-r[1], r[0], r[3],-r[2]},
			{-r[2],-r[3], r[0], r[1]},
			{-r[3], r[2],-r[1], r[0]}};
		
	}

	/**
	 * Get derivatives of the composed quaternion (compose(q,r)) by the
	 * components of the second one (r). These derivatives do not depend
	 * on the second quaternion, so it is not in the input.
	 * Also can be used to convert quaternion to a matrix for post-multiplying
	 * derivatives. 
	 * @param q  4 components (scalar, vector) of the quaternion being
	 *           applied to the second quaternion. 
	 * @return 4x4 array, where columns correspond to composition components
	 *         (samples in LMA) and rows - to the source quaternion
	 *         (missing from the input) components.
	 */
	public static double [][] composeDR(
			double [] q) {
		return new double [][] {                
			{ q[0], -q[1], -q[2], -q[3]},				                
			{ q[1],  q[0],  q[3], -q[2]},
			{ q[2], -q[3],  q[0],  q[1]},
			{ q[3],  q[2], -q[1],  q[0]}};
	}

	public static double [] addTo(
			double [] offs,
			double [] xyz) {
		return new double [] {
				offs[0]+xyz[0],
				offs[1]+xyz[1],
				offs[2]+xyz[2]};
	}

	
	/**
	 * Apply quaternion to a 3D vector 
	 * @param q   4 components (scalar, vector) of the quaternion being applied
	 *            to as vector.
	 * @param  xyz 1-d array representing a 3D vector {X, Y, Z}
	 * @return rotated 3D vector as 1 1D array {X, Y, Z}   
	 */
	public static double [] applyTo(
			double [] q,
			double [] xyz) {
		final double s = q[1] * xyz[0] + q[2] * xyz[1] + q[3] * xyz[2];
		return new double [] {
				2 * (q[0] * (xyz[0] * q[0] - (q[2] * xyz[2] - q[3] * xyz[1])) + s * q[1]) - xyz[0],
				2 * (q[0] * (xyz[1] * q[0] - (q[3] * xyz[0] - q[1] * xyz[2])) + s * q[2]) - xyz[1],
				2 * (q[0] * (xyz[2] * q[0] - (q[1] * xyz[1] - q[2] * xyz[0])) + s * q[3]) - xyz[2]};
	}
	
	/**
	 * Get derivatives of the rotated vector (see applyTo(q,xyz)) by the components of the quaternion q
	 * @param q   4 components (scalar, vector) of the quaternion being applied
	 *            to as vector.
	 * @param  xyz 1-d array representing a 3D vector {X, Y, Z}
	 * @return 4x3 array, where columns correspond to xyz components (samples in LMA)
	 *         and rows - to the quaternion q components.
	 */
	public static double [][] applyToDQ(
			double [] q,
			double [] xyz) {
		final double s = q[1] * xyz[0] + q[2] * xyz[1] + q[3] * xyz[2];
		/*
		return new double [][] {
			{4*xyz[0]*q[0]-2*xyz[2]*q[2]+2*xyz[1]*q[3], 2*s +         2*xyz[0]*q[1], 2*xyz[2]*q[0]+2*xyz[1]*q[1], 2*xyz[1]*q[0]+2*xyz[2]*q[1]},
			{4*xyz[1]*q[0]-2*xyz[0]*q[3]+2*xyz[2]*q[1], 2*xyz[2]*q[0]+2*xyz[0]*q[2], 2*s +         2*xyz[1]*q[2], 2*xyz[0]*q[0]+2*xyz[2]*q[2]},
			{4*xyz[2]*q[0]-2*xyz[1]*q[1]+2*xyz[0]*q[2], 2*xyz[1]*q[0]+2*xyz[0]*q[3], 2*xyz[0]*q[0]+2*xyz[1]*q[3], 2*s          +2*xyz[2]*q[3]}};
			*/
		/*
		return new double[][] {
            {4*xyz[0]*q[0]-2*xyz[2]*q[2]+2*xyz[1]*q[3], 4*xyz[1]*q[0]-2*xyz[0]*q[3]+2*xyz[2]*q[1],4*xyz[2]*q[0]-2*xyz[1]*q[1]+2*xyz[0]*q[2]},
            {2*s +         2*xyz[0]*q[1],               2*xyz[2]*q[0]+2*xyz[0]*q[2],              2*xyz[1]*q[0]+2*xyz[0]*q[3]},
            {2*xyz[2]*q[0]+2*xyz[1]*q[1],               2*s +         2*xyz[1]*q[2],              2*xyz[0]*q[0]+2*xyz[1]*q[3]}, 
            {2*xyz[1]*q[0]+2*xyz[2]*q[1],               2*xyz[0]*q[0]+2*xyz[2]*q[2],              2*s          +2*xyz[2]*q[3]}};
            */
		return new double[][] {
			{ 4*xyz[0]*q[0]-2*xyz[2]*q[2]+2*xyz[1]*q[3], 4*xyz[1]*q[0]-2*xyz[0]*q[3]+2*xyz[2]*q[1],4*xyz[2]*q[0]-2*xyz[1]*q[1]+2*xyz[0]*q[2]},
			{ 2*s +         2*xyz[0]*q[1],               2*xyz[2]*q[0]+2*xyz[0]*q[2],             -2*xyz[1]*q[0]+2*xyz[0]*q[3]},
			{-2*xyz[2]*q[0]+2*xyz[1]*q[1],               2*s +         2*xyz[1]*q[2],              2*xyz[0]*q[0]+2*xyz[1]*q[3]}, 
			{ 2*xyz[1]*q[0]+2*xyz[2]*q[1],              -2*xyz[0]*q[0]+2*xyz[2]*q[2],              2*s          +2*xyz[2]*q[3]}};
	}
	
	public static double [] mulMat(
			double [][] mat,
			double [] vect) {
		double [] rslt = new double[mat.length];
		for (int i = 0; i < mat.length; i++) {
			for (int j = 0; j < vect.length; j++) {
				rslt[i]+= mat[i][j]* vect[j];
			}
		}
		return rslt;
		
	}
	
	public static double [][] mulMat(
			double [][] mat,
			double [][] mat1) {
		double [][] rslt = new double[mat.length][mat1[0].length];
		for (int i = 0; i < rslt.length; i++) {
			for (int j = 0; j < rslt[0].length; j++) {
				for (int k = 0; k < mat[0].length; k++) {
					rslt[i][j] += mat[i][k]* mat1[k][j];
				}
			}
		}
		return rslt;
	}
	
	public static double [][] qMat(
			double [] q){
		return new double [][] {                
			{ q[0], q[1], q[2], q[3]}, 
			{-q[1], q[0],-q[3], q[2]}, 
			{-q[2], q[3], q[0],-q[1]},
			{-q[3],-q[2], q[1], q[0]}};
		/*
		return new double [][] {
			{q[0],-q[1],-q[2],-q[3]},
			{q[1], q[0], q[3],-q[2]},
			{q[2],-q[3], q[0], q[1]},
			{q[3], q[2],-q[1], q[0]}};
			*/
	}

	public static double [][] transpose(
			double [][] mat){
		double [][] tmat = new double[mat[0].length][mat.length];
		for (int i = 0; i < mat.length; i++) {
			for (int j = 0; j < mat[0].length; j++) {
				tmat[j][i] = mat[i][j];
			}
		}
		return tmat;
	}
	public static double getQ0(double [] q123) {
		return Math.sqrt(1.0-(q123[0]*q123[0]+q123[1]*q123[1]+q123[2]*q123[2]));
	}
	
	public static double [] dQuat123( // not used
			double [] q123,
			double [] dq0123, // [4]
			double q0) { // null or double[3]
		double [] dq123 = new double[3];
		for (int i = 0; i < dq123.length; i++) {
			dq123[i]=dq0123[i+1]- dq0123[0] * q123[i]/q0;
		}
		return dq123;
	}
	
	public static double [][] dQuat123(
			double [] q123,
			double [][] dq0123, // [4][]
			double q0) { // null or double[3]
		double [][] dq123 = new double[3][dq0123[0].length];
		for (int n = 0; n < dq123[0].length; n++) {
			for (int i = 0; i < dq123.length; i++) {
				dq123[i][n]=dq0123[i+1][n]- dq0123[0][n]*q123[i]/q0;
			}
		}
		return dq123;
	}
	
	public static double [][] dQ_dQ123(
			double [] q123){
		double d = 1.0;
		for (int i = 0; i < q123.length; i++) {
			d-= q123[i] * q123[i];
		}
		double q0 = Math.sqrt(d); 
		double [][] mat = new double [q123.length][q123.length+1];
		for (int i = 0; i < mat.length; i++) {
			mat[i][0] =   -q123[i]/q0;
			mat[i][i+1] = 1;
		}
		return mat;
	}
	
	public static double qNorm(
			final double [] q,
			final double [] qn) {
		double l = Math.sqrt(q[0]*q[0]+q[1]*q[1]+q[2]*q[2]+q[3]*q[3]);
		if (qn != null) {
			for (int i = 0; i < qn.length; i++) {
				qn[i] = q[i]/l; 
			}
		}
		return l;
	}
	
	public static double [][] dQndQ(
			final double [] q){
		final double [][] dq = new double [q.length][q.length];
		final double l = qNorm(q, null);
		final double l3 = l*l*l;
		for (int i = 0; i < q.length; i++) {
			for (int j = 0; j < q.length; j++) {
				dq[j][i] = -q[i]*q[j]/l3; // dq[i][j] = d; 
			}
			dq[i][i] += 1/l;
		}
		return dq;
	}
	
	public static void invertDeriv(
			double [][] deriv,
			boolean [] invert){
		for (int i = 0; i < deriv.length; i++) {
			if (invert[i]) {
				for (int j = 0; j < deriv[i].length; j++) {
					deriv[i][j] *= -1;
				}
			}
		}
	}
	public static void invertDeriv(
			double [] deriv,
			boolean [] invert){
		for (int i = 0; i < deriv.length; i++) {
			if (invert[i]) {
				deriv[i] *= -1;
			}
		}
	}
	
	public static double [][][] scaleXYZ(
			double [][][] vect_x, // []{{x,y,z},{a,t,r}}
			double [][][] vect_y, //  []{{x,y,z},{a,t,r}}
			int []        first_last){
		if (first_last == null) {
			first_last = new int [] {0,vect_x.length-1}; 
		}
		double dia_x = getDiameter(vect_x,first_last);
		double dia_y = getDiameter(vect_y,first_last);
		if (!(dia_x > 0) || !(dia_y > 0)) return null;
		double s = dia_x/dia_y;
		double [][][] scaled_xyz = new double [vect_y.length][][];
		for (int i = first_last[0]; i <= first_last[1]; i++) if (vect_y[i] != null){
			scaled_xyz[i] = new double [][] {
				{s* vect_y[i][0][0],s* vect_y[i][0][1],s* vect_y[i][0][2]},
				vect_y[i][1]};
		}
		return scaled_xyz;
	}
	
	public static double getDiameter(
			double [][][] xyzatr,
			int [] first_last) {
		if (first_last == null) {
			first_last = new int [] {0,xyzatr.length-1}; 
		}
		int i0= first_last[0];
		for (;i0 <= first_last[1]; i0++) if (xyzatr[i0] != null) break;
		if (i0 > first_last[1]) return Double.NaN; // empty sequence
		double l2 = 0;
		int i1 = i0;
		for (int i = i0; i <= first_last[1]; i++) if (xyzatr[i] != null){
			double d = 0;
			for (int j = 0; j < 3; j++) {
				double dd = xyzatr[i][0][j]-xyzatr[i0][0][j]; 
				d+= dd*dd;
			}
			if (d > l2) {
				l2 = d;
				i1 = i;
			}
		}
		for (int i = i1; i <= first_last[1]; i++) if (xyzatr[i] != null){
			double d = 0;
			for (int j = 0; j < 3; j++) {
				double dd = xyzatr[i][0][j]-xyzatr[i1][0][j]; 
				d+= dd*dd;
			}
			if (d > l2) {
				l2 = d;
				i0 = i;
			}
		}		
		return Math.sqrt(l2);
	}
	
	
}
