/**
 **
 ** IntersceneGlobalRefine - Global sparse pose refinement around a fixed reference scene
 **
 ** Copyright (C) 2026 Elphel, Inc.
 **
 ** -----------------------------------------------------------------------------**
 **
 **  IntersceneGlobalRefine.java is free software: you can redistribute it and/or modify
 **  it under the terms of the GNU General Public License as published by
 **  the Free Software Foundation, either version 3 of the License, or
 **  (at your option) any later version.
 **
 **  This program is distributed in the hope that it will be useful,
 **  but WITHOUT ANY WARRANTY; without even the implied warranty of
 **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 **  GNU General Public License for more details.
 **
 **  You should have received a copy of the GNU General Public License
 **  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ** -----------------------------------------------------------------------------**
 **
 */
package com.elphel.imagej.tileprocessor;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.concurrent.atomic.AtomicInteger;

import com.elphel.imagej.cameras.CLTParameters;
import com.elphel.imagej.gpu.TpTask;

/**
 * Temporary implementation/debug map (keep while stabilizing global refinement).
 *
 * <p>What this class currently does:
 * <ul>
 *   <li>Runs global refinement over a segment with one fixed center scene.</li>
 *   <li>Global unknowns are pose-only scene parameters: {@code DSAZ..DSZ} for non-center scenes.</li>
 *   <li>Builds per-pair local normal equations using {@link IntersceneLma} + GPU correlation.</li>
 *   <li>Supports both center-reference pairs and selected non-center parent pairs (FPN mitigation),
 *       using the same pairing logic as legacy code ({@link Interscene#getFPNPairs}).</li>
 *   <li>Projects local equations to global pose-only unknowns via chain rule:
 *       local rates ({@code DSV*, DV*}) are treated as dependent variables from neighboring poses.</li>
 * </ul>
 *
 * <p>Velocity/omega model:
 * <ul>
 *   <li>Rates are recomputed from neighboring scene poses each outer iteration.</li>
 *   <li>Interior scenes use centered differences; boundaries use one-sided differences.</li>
 *   <li>ATR differences are wrapped with {@code +/-pi} safety before division by {@code dt}.</li>
 *   <li>These rates are used in two places:
 *       GPU motion-blur vectors and local Jacobian-to-global chain mapping.</li>
 * </ul>
 *
 * <p>Global matrix data structures (block-sparse, band-limited in scene index):
 * <ul>
 *   <li>{@code nvars = lastScene-firstScene+1}, including center index slot (fixed by constraints).</li>
 *   <li>{@code npars = number of active pose params per scene} (subset of {@code DSAZ..DSZ}).</li>
 *   <li>Linearized system: {@code H * delta = b}.</li>
 *   <li>{@code diag[nvars][npars][npars]} stores block diagonal {@code H(i,i)}.</li>
 *   <li>{@code offDiag1[nvars-1][npars][npars]} stores block {@code H(i,i+1)}.</li>
 *   <li>{@code offDiag2[nvars-2][npars][npars]} stores block {@code H(i,i+2)}.</li>
 *   <li>By symmetry, {@code H(i+1,i)} and {@code H(i+2,i)} are implied/transposed in mat-vec.</li>
 *   <li>{@code rhs[nvars][npars]} stores the block vector {@code b}.</li>
 * </ul>
 *
 * <p>Assembly pipeline:
 * <ul>
 *   <li>{@code buildPairFactors()} creates scene-reference factors (center + optional non-center).</li>
 *   <li>{@code getReferenceGpuData()} prepares/caches GPU reference TD tasks per reference scene.</li>
 *   <li>{@code buildLocalSystem()} runs pair correlation + local LMA normal extraction.</li>
 *   <li>{@code accumulateMappedNormalEquation()} maps local params to global pose terms and adds:
 *       {@code A^T * H_local * A} and {@code A^T * b_local}.</li>
 *   <li>{@code assembleCurvatureBlocks()} adds 2nd-order inter-scene LPF:
 *       {@code x_i - 0.5*(x_{i-1}+x_{i+1})}.</li>
 *   <li>{@code regularizeDiagonalAndFixInactive()} adds LM-like diagonal and hard-fixes inactive slots.</li>
 *   <li>{@code solvePcg()} solves with multithreaded block-band mat-vec.</li>
 * </ul>
 *
 * <p>Important constraints:
 * <ul>
 *   <li>Reference scene pose is fixed (center pose is hard zero in this mode).</li>
 *   <li>{@code param_select} is interpreted only for scene pose params ({@code DSAZ..DSZ}).</li>
 *   <li>Rate params are auxiliary for local model/chain rule, not global independent unknowns.</li>
 * </ul>
 */
public class IntersceneGlobalRefine {
	private static final int[] SCENE_POSE_PAR_INDICES = {
			ErsCorrection.DP_DSAZ,
			ErsCorrection.DP_DSTL,
			ErsCorrection.DP_DSRL,
			ErsCorrection.DP_DSX,
			ErsCorrection.DP_DSY,
			ErsCorrection.DP_DSZ};
	private static final int[] REF_POSE_PAR_INDICES = {
			ErsCorrection.DP_DAZ,
			ErsCorrection.DP_DTL,
			ErsCorrection.DP_DRL,
			ErsCorrection.DP_DX,
			ErsCorrection.DP_DY,
			ErsCorrection.DP_DZ};
	private static final int[] REF_RATE_ANG_PAR_INDICES = {
			ErsCorrection.DP_DVAZ,
			ErsCorrection.DP_DVTL,
			ErsCorrection.DP_DVRL};
	private static final int[] REF_RATE_LIN_PAR_INDICES = {
			ErsCorrection.DP_DVX,
			ErsCorrection.DP_DVY,
			ErsCorrection.DP_DVZ};
	private static final int[] SCENE_RATE_ANG_PAR_INDICES = {
			ErsCorrection.DP_DSVAZ,
			ErsCorrection.DP_DSVTL,
			ErsCorrection.DP_DSVRL};
	private static final int[] SCENE_RATE_LIN_PAR_INDICES = {
			ErsCorrection.DP_DSVX,
			ErsCorrection.DP_DSVY,
			ErsCorrection.DP_DSVZ};
	private static final double MIN_DIAG = 1.0e-12;
	private static final double DEFAULT_MAX_OFFSET = 256.0;
	private static final double DEFAULT_DT = 1.0 / 60.0;

	public static class Options {
		public int outerIterations = 3;
		public int pcgIterations = 160;
		public double pcgTolerance = 1.0e-7;
		public double deltaStop = 1.0e-5;
		public double lambdaDiag = 0.1;
		public double smoothWeightAtr = 1.0;
		public double smoothWeightXyz = 0.15;
		public double minOffset = 3.0;
		public double maxOffset = DEFAULT_MAX_OFFSET;
	}

	public static class Result {
		public int outerDone = 0;
		public int solvedScenes = 0;
		public int failedScenes = 0;
		public double maxDelta = Double.NaN;
		public double avgPairRms = Double.NaN;
		public int pcgIterationsLast = 0;
	}

	private static class LocalSystem {
		boolean valid = false;
		double[] rms = null;
	}

	/**
	 * One correlation factor: match {@code sceneIndex} to {@code refSceneIndex}.
	 * In phase-1 global refinement this is usually center-reference.
	 * For FPN mitigation we add a few non-center parent references.
	 */
	private static class PairFactor {
		int sceneIndex;
		int refSceneIndex;
		boolean nonCenterReference;

		PairFactor(
				final int sceneIndex,
				final int refSceneIndex,
				final boolean nonCenterReference) {
			this.sceneIndex = sceneIndex;
			this.refSceneIndex = refSceneIndex;
			this.nonCenterReference = nonCenterReference;
		}
	}

	private static class RefGpuData {
		int refSceneIndex = -1;
		double[][] pXpYD = null;
		TpTask[] tasks = null;
		boolean[] reliableMask = null;
	}

	private static class RateTerm {
		int sceneIndex;
		double coeff;

		RateTerm(
				final int sceneIndex,
				final double coeff) {
			this.sceneIndex = sceneIndex;
			this.coeff = coeff;
		}
	}

	private static class GlobalTerm {
		int varIndex;
		int parIndex;
		double coeff;

		GlobalTerm(
				final int varIndex,
				final int parIndex,
				final double coeff) {
			this.varIndex = varIndex;
			this.parIndex = parIndex;
			this.coeff = coeff;
		}
	}

	/**
	 * Run global sparse refinement for scenes in [earliestScene..lastScene], excluding centerIndex.
	 *
	 * @param clt_parameters processing parameters
	 * @param quadCLTs scene array
	 * @param centerCLT fixed reference scene
	 * @param centerIndex index of reference scene in quadCLTs
	 * @param earliestScene earliest processed scene index (inclusive)
	 * @param lastScene last processed scene index (inclusive)
	 * @param scenes_xyzatr per-scene poses [scene][{xyz,atr}][3], updated in place
	 * @param paramSelect enabled parameters mask, full DP_NUM_PARS size (scene pose subset is used here)
	 * @param paramRegweights local per-parameter regularization weights, full DP_NUM_PARS size
	 * @param paramLpf inter-scene low-pass curvature weights, full DP_NUM_PARS size
	 * @param centerDisparity reference disparity map (null -> centerCLT current)
	 * @param reliableRef optional tile reliability mask for reference
	 * @param disableErs true to disable ERS-related parameters in local solves
	 * @param mb_max_gain motion blur gain limit
	 * @param options global solver options
	 * @param debugLevel debug level
	 * @return summary result
	 */
	public static Result refineAllToReference(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT,
			final int centerIndex,
			final int earliestScene,
			final int lastScene,
			final double[][][] scenes_xyzatr,
			final boolean[] paramSelect,
			final double[] paramRegweights,
			final double[] paramLpf,
			final double[] centerDisparity,
			final boolean[] reliableRef,
			final boolean disableErs,
			final double mb_max_gain,
			final Options options,
			final int debugLevel) {
		final Result result = new Result();
		if ((quadCLTs == null) || (centerCLT == null) || (scenes_xyzatr == null) || (options == null)) {
			return result;
		}
		final int firstScene = Math.max(0, earliestScene);
		final int endScene = Math.min(lastScene, quadCLTs.length - 1);
		if (endScene < firstScene) {
			return result;
		}
		final int nvars = endScene - firstScene + 1;
		final boolean[] activePoseScene = new boolean[nvars];
		int numActive = 0;
		for (int nscene = firstScene; nscene <= endScene; nscene++) {
			final int ivar = nscene - firstScene;
			activePoseScene[ivar] = (nscene != centerIndex) &&
					(quadCLTs[nscene] != null) &&
					(scenes_xyzatr[nscene] != null);
			if (activePoseScene[ivar]) {
				numActive++;
			}
		}
		if (numActive == 0) {
			return result;
		}
		final double[] refDisparity = (centerDisparity != null) ? centerDisparity : centerCLT.getDLS()[clt_parameters.imp.use_lma_dsi ? 1 : 0];
		final double[][] pXpYDCenter = OpticalFlow.transformToScenePxPyD(
				null,
				refDisparity,
				Interscene.ZERO3,
				Interscene.ZERO3,
				centerCLT,
				centerCLT);
		final double[] sceneTimes = getSceneTimes(
				quadCLTs,
				firstScene,
				endScene);

		final boolean[] lmaParamSelect = getSceneParamMask(
				paramSelect,
				disableErs);
		final int[] sceneParIndices = getActiveSceneParameterIndices(lmaParamSelect);
		final int npars = sceneParIndices.length;
		if (npars == 0) {
			if (debugLevel > -2) {
				System.out.println("IntersceneGlobalRefine: no active scene parameters selected, nothing to optimize.");
			}
			return result;
		}
		final double[] lmaParamRegweights = new double[ErsCorrection.DP_NUM_PARS];
		if (paramRegweights != null) {
			System.arraycopy(
					paramRegweights,
					0,
					lmaParamRegweights,
					0,
					Math.min(paramRegweights.length, lmaParamRegweights.length));
		}
		zeroRateRegularization(lmaParamRegweights);
		final double[] sceneParamLpf = getSceneLpfWeights(
				sceneParIndices,
				paramLpf,
				options);
		final double[] x = new double[nvars * npars];
		for (int nscene = firstScene; nscene <= endScene; nscene++) {
			final int ivar = nscene - firstScene;
			final double[][] pose = getScenePose(
					scenes_xyzatr,
					nscene,
					centerIndex);
			setSceneState(
					x,
					ivar,
					getSceneVector(
							pose,
							sceneParIndices),
					npars);
		}

		for (int outer = 0; outer < options.outerIterations; outer++) {
			final double[] minMax = {options.minOffset, options.maxOffset, Double.NaN};
			final ArrayList<PairFactor> pairFactors = buildPairFactors(
					clt_parameters,
					quadCLTs,
					scenes_xyzatr,
					activePoseScene,
					firstScene,
					endScene,
					centerIndex,
					options.minOffset,
					debugLevel);
			final RefGpuData[] refGpuCache = new RefGpuData[nvars];
			// Global block-banded normal equation storage:
			// diag[i]     -> H(i,i)
			// offDiag1[i] -> H(i,i+1)
			// offDiag2[i] -> H(i,i+2)
			// rhs[i]      -> b(i)
			final double[][][] diag = new double[nvars][npars][npars];
			final double[][] rhs = new double[nvars][npars];
			final double[][][] offDiag1 = new double[Math.max(0, nvars - 1)][npars][npars];
			final double[][][] offDiag2 = new double[Math.max(0, nvars - 2)][npars][npars];

			int solvedPairs = 0;
			int failedPairs = 0;
			double rmsSum = 0.0;
			final boolean[] sceneSolved = new boolean[nvars];
			for (int ifactor = 0; ifactor < pairFactors.size(); ifactor++) {
				final PairFactor pf = pairFactors.get(ifactor);
				final int ivar = pf.sceneIndex - firstScene;
				if ((ivar < 0) || (ivar >= nvars) || !activePoseScene[ivar]) {
					continue;
				}
				final RefGpuData refGpu = getReferenceGpuData(
						clt_parameters,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes,
						firstScene,
						endScene,
						centerIndex,
						pf.refSceneIndex,
						refDisparity,
						pXpYDCenter,
						reliableRef,
						mb_max_gain,
						refGpuCache,
						debugLevel);
				final LocalSystem ls = buildLocalSystem(
						clt_parameters,
						quadCLTs,
						centerCLT,
						centerIndex,
						pf.sceneIndex,
						pf.refSceneIndex,
						firstScene,
						endScene,
						scenes_xyzatr,
						sceneTimes,
						refGpu.pXpYD,
						refGpu.tasks,
						refGpu.reliableMask,
						refDisparity,
						minMax,
						paramSelect,
						lmaParamRegweights,
						sceneParIndices,
						activePoseScene,
						disableErs,
						mb_max_gain,
						diag,
						offDiag1,
						offDiag2,
						rhs,
						debugLevel);
				if (ls.valid) {
					solvedPairs++;
					rmsSum += (ls.rms == null) ? 0.0 : ls.rms[0];
					sceneSolved[ivar] = true;
				} else {
					failedPairs++;
				}
			}
			int solved = 0;
			for (int i = 0; i < nvars; i++) {
				if (activePoseScene[i] && sceneSolved[i]) {
					solved++;
				}
			}
			final int failed = numActive - solved;
			result.solvedScenes = solved;
			result.failedScenes = failed;
			result.avgPairRms = (solvedPairs > 0) ? (rmsSum / solvedPairs) : Double.NaN;
			if (solvedPairs == 0) {
				break;
			}

			assembleCurvatureBlocks(
					x,
					diag,
					offDiag1,
					offDiag2,
					rhs,
					sceneParamLpf,
					activePoseScene,
					npars);
			regularizeDiagonalAndFixInactive(
					diag,
					rhs,
					options.lambdaDiag,
					activePoseScene,
					npars);
			final double[] delta = new double[nvars * npars];
			final int pcgIter = solvePcg(
					diag,
					offDiag1,
					offDiag2,
					rhs,
					delta,
					options.pcgIterations,
					options.pcgTolerance,
					npars);
			result.pcgIterationsLast = pcgIter;
			double maxDelta = 0.0;
			for (int nscene = firstScene; nscene <= endScene; nscene++) {
				final int ivar = nscene - firstScene;
				if (!activePoseScene[ivar]) {
					continue;
				}
				final double[] state = getSceneState(
						x,
						ivar,
						npars);
				for (int k = 0; k < npars; k++) {
					final double d = delta[ivar * npars + k];
					state[k] += d;
					maxDelta = Math.max(maxDelta, Math.abs(d));
				}
				setSceneState(
						x,
					ivar,
					state,
					npars);
				if (scenes_xyzatr[nscene] != null) {
					setSceneVector(
							scenes_xyzatr[nscene],
							state,
							sceneParIndices);
				}
			}
			result.maxDelta = maxDelta;
			result.outerDone = outer + 1;
			if (debugLevel > -3) {
				System.out.println(String.format(
						"IntersceneGlobalRefine: outer=%d, solvedScenes=%d, failedScenes=%d, solvedPairs=%d, failedPairs=%d, avgPairRms=%8.6f, maxDelta=%8.6g, pcgIter=%d",
						outer,
						solved,
						failed,
						solvedPairs,
						failedPairs,
						result.avgPairRms,
						result.maxDelta,
						pcgIter));
			}
			if (maxDelta < options.deltaStop) {
				break;
			}
		}
		if (debugLevel > -4) {
			System.out.println(String.format(
					"IntersceneGlobalRefine: finished, outerDone=%d, solved=%d, failed=%d, avgRms=%8.6f",
					result.outerDone,
					result.solvedScenes,
					result.failedScenes,
					result.avgPairRms));
		}
		return result;
	}

	// -----------------------------------------------------------------------------
	// Pair/reference orchestration
	// -----------------------------------------------------------------------------

	/**
	 * Build pair factors for one outer iteration.
	 * Base set is scene-to-center for all active scenes.
	 * Optional add-on uses legacy {@link Interscene#getFPNPairs(...)} to add
	 * non-center parent references for FPN-prone scenes.
	 */
	private static ArrayList<PairFactor> buildPairFactors(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr,
			final boolean[] activePoseScene,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final double minOffset,
			final int debugLevel) {
		final ArrayList<PairFactor> factors = new ArrayList<PairFactor>();
		final HashSet<Long> used = new HashSet<Long>();

		for (int nscene = firstScene; nscene <= lastScene; nscene++) {
			final int ivar = nscene - firstScene;
			if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
				final long key = (((long) nscene) << 32) | (centerIndex & 0xffffffffL);
				if (used.add(key)) {
					factors.add(new PairFactor(
							nscene,
							centerIndex,
							false));
				}
			}
		}
		if (!clt_parameters.imp.fmg_initial_en) {
			return factors;
		}
		final double avgZ = getAverageAbsZ(
				scenes_xyzatr,
				activePoseScene,
				firstScene);
		final ArrayList<Integer> fpnList = new ArrayList<Integer>();
		for (int nscene = firstScene; nscene <= lastScene; nscene++) {
			final int ivar = nscene - firstScene;
			if ((ivar < 0) || (ivar >= activePoseScene.length) || !activePoseScene[ivar]) {
				continue;
			}
			double estShift = quadCLTs[centerIndex].estimateAverageShift(
					getScenePose(scenes_xyzatr, centerIndex, centerIndex),
					getScenePose(scenes_xyzatr, nscene, centerIndex),
					avgZ,
					false,
					clt_parameters.imp.fmg_rectilinear);
			if (estShift < minOffset) {
				estShift = quadCLTs[centerIndex].estimateAverageShift(
						getScenePose(scenes_xyzatr, centerIndex, centerIndex),
						getScenePose(scenes_xyzatr, nscene, centerIndex),
						avgZ,
						true,
						clt_parameters.imp.fmg_rectilinear);
			}
			if (estShift < minOffset) {
				fpnList.add(nscene);
			}
		}
		if (fpnList.isEmpty()) {
			return factors;
		}
		double fmgDistance = clt_parameters.imp.fmg_distance;
		if (fmgDistance < (minOffset + 2.0)) {
			fmgDistance = minOffset + 2.0;
		}
		final int[][] fpnPairs = Interscene.getFPNPairs(
				fpnList,
				fmgDistance,
				clt_parameters.imp.fmg_rectilinear,
				quadCLTs,
				scenes_xyzatr,
				avgZ,
				centerIndex,
				firstScene);
		int addedNonCenter = 0;
		for (int i = 0; i < fpnPairs.length; i++) {
			final int scene = fpnPairs[i][0];
			final int ref = fpnPairs[i][1];
			if ((scene < firstScene) || (scene > lastScene) || (ref < firstScene) || (ref > lastScene)) {
				continue;
			}
			if ((scene == ref) || (ref == centerIndex)) {
				continue;
			}
			final int ivarScene = scene - firstScene;
			final int ivarRef = ref - firstScene;
			if ((ivarScene < 0) || (ivarScene >= activePoseScene.length) || !activePoseScene[ivarScene]) {
				continue;
			}
			if ((ivarRef < 0) || (ivarRef >= activePoseScene.length) || !activePoseScene[ivarRef]) {
				continue;
			}
			if ((quadCLTs[ref] == null) || (scenes_xyzatr[ref] == null)) {
				continue;
			}
			final long key = (((long) scene) << 32) | (ref & 0xffffffffL);
			if (used.add(key)) {
				factors.add(new PairFactor(
						scene,
						ref,
						true));
				addedNonCenter++;
				if (debugLevel > 1) {
					System.out.println("IntersceneGlobalRefine: added non-center pair scene=" + scene + " ref=" + ref);
				}
			}
		}
		if (debugLevel > -3) {
			System.out.println("IntersceneGlobalRefine: pair factors total=" + factors.size() +
					", nonCenterAdded=" + addedNonCenter + ", fpnCandidates=" + fpnList.size());
		}
		return factors;
	}

	private static double getAverageAbsZ(
			final double[][][] scenes_xyzatr,
			final boolean[] activePoseScene,
			final int firstScene) {
		double sum = 0.0;
		int n = 0;
		for (int ivar = 0; ivar < activePoseScene.length; ivar++) {
			if (!activePoseScene[ivar]) {
				continue;
			}
			final int nscene = firstScene + ivar;
			if ((nscene >= 0) && (nscene < scenes_xyzatr.length) && (scenes_xyzatr[nscene] != null)) {
				sum += Math.abs(scenes_xyzatr[nscene][0][2]);
				n++;
			}
		}
		return (n > 0) ? Math.max(1.0, sum / n) : 1.0;
	}

	/**
	 * Build or fetch GPU reference data for one reference scene.
	 * Uses center disparity projected into the selected reference frame.
	 */
	private static RefGpuData getReferenceGpuData(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr,
			final double[] sceneTimes,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final int refScene,
			final double[] refDisparity,
			final double[][] pXpYDCenter,
			final boolean[] reliableRefCenter,
			final double mb_max_gain,
			final RefGpuData[] cache,
			final int debugLevel) {
		final int ivar = refScene - firstScene;
		if ((ivar >= 0) && (ivar < cache.length) && (cache[ivar] != null) &&
				(cache[ivar].refSceneIndex == refScene) && (cache[ivar].tasks != null) &&
				(cache[ivar].pXpYD != null)) {
			return cache[ivar];
		}
		if ((refScene < 0) || (refScene >= quadCLTs.length) || (quadCLTs[refScene] == null)) {
			return null;
		}
		final RefGpuData rslt = new RefGpuData();
		rslt.refSceneIndex = refScene;
		final double[][] refPose = getScenePose(
				scenes_xyzatr,
				refScene,
				centerIndex);
		final double[][] refRates = getSceneRates(
				refScene,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				firstScene,
				lastScene,
				centerIndex);
		rslt.pXpYD = (refScene == centerIndex) ? pXpYDCenter : OpticalFlow.transformToScenePxPyD(
				null,
				refDisparity,
				refPose[0],
				refPose[1],
				quadCLTs[refScene],
				quadCLTs[centerIndex]);
		rslt.reliableMask = (refScene == centerIndex) ? reliableRefCenter : null;
		double[][] mbVectorsRef = null;
		if (clt_parameters.imp.mb_en) {
			final double[][] refDt = QuadCLTCPU.scaleDtFromErs(
					clt_parameters,
					new double[][] {
						refRates[0].clone(),
						refRates[1].clone()});
			mbVectorsRef = OpticalFlow.getMotionBlur(
					quadCLTs[centerIndex],
					quadCLTs[refScene],
					pXpYDCenter,
					refPose[0],
					refPose[1],
					refDt[0],
					refDt[1],
					0,
					debugLevel);
		}
		final TpTask[][] tasks = Interscene.setReferenceGPU(
				clt_parameters,
				quadCLTs[refScene],
				refDisparity,
				rslt.pXpYD,
				null,
				rslt.reliableMask,
				0,
				clt_parameters.imp.mb_tau,
				mb_max_gain,
				mbVectorsRef,
				debugLevel);
		if ((tasks == null) || (tasks.length == 0) || (tasks[0] == null)) {
			return null;
		}
		rslt.tasks = tasks[0];
		if ((ivar >= 0) && (ivar < cache.length)) {
			cache[ivar] = rslt;
		}
		return rslt;
	}

	private static LocalSystem buildLocalSystem(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT,
			final int centerIndex,
			final int nscene,
			final int refScene,
			final int earliestScene,
			final int lastScene,
			final double[][][] scenes_xyzatr,
			final double[] sceneTimes,
			final double[][] pXpYDRef,
			final TpTask[] tpTaskRef,
			final boolean[] reliableRef,
			final double[] refDisparity,
			final double[] minMax,
			final boolean[] paramSelect,
			final double[] paramRegweights,
			final int[] sceneParIndices,
			final boolean[] activePoseScene,
			final boolean disableErs,
			final double mb_max_gain,
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs,
			final int debugLevel) {
		final LocalSystem ls = new LocalSystem();
		if ((quadCLTs[nscene] == null) || (scenes_xyzatr[nscene] == null)) {
			return ls;
		}
		final double[][] poseNow = getScenePose(
				scenes_xyzatr,
				nscene,
				centerIndex);
		final double[][] refPose = getScenePose(
				scenes_xyzatr,
				refScene,
				centerIndex);
		final double[][] sceneRates = getSceneRates(
				nscene,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				centerIndex);
		final double[][] refRates = getSceneRates(
				refScene,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				centerIndex);
		final boolean mb_en = clt_parameters.imp.mb_en;
		final double mb_tau = clt_parameters.imp.mb_tau;
		double[][] mbVectorsScene = null;
		if (mb_en) {
			final double[][] sceneDt = QuadCLTCPU.scaleDtFromErs(
					clt_parameters,
					new double[][] {
						sceneRates[0].clone(),
						sceneRates[1].clone()});
			mbVectorsScene = OpticalFlow.getMotionBlur(
					centerCLT,
					quadCLTs[nscene],
					pXpYDRef,
					poseNow[0],
					poseNow[1],
					sceneDt[0],
					sceneDt[1],
					0,
					debugLevel);
		}
		final int[] failReason = {0};
		final double[][][] coordMotion = Interscene.interCorrPair(
				clt_parameters,
				clt_parameters.ilp.ilma_3d,
				false,
				mb_max_gain,
				minMax,
				failReason,
				centerCLT,
				refDisparity,
				quadCLTs[refScene],
				pXpYDRef,
				tpTaskRef,
				quadCLTs[nscene],
				poseNow[0],
				poseNow[1],
				reliableRef,
				clt_parameters.imp.margin,
				clt_parameters.imp.sensor_mask_inter,
				null,
				null,
				false,
				false,
				true,
				mbVectorsScene,
				-1,
				false,
				clt_parameters.imp.debug_level,
				debugLevel);
		if ((coordMotion == null) || (coordMotion.length < 2) || (coordMotion[0] == null) || (coordMotion[1] == null)) {
			return ls;
		}
		final double[][] eigen = (coordMotion.length > 2) ? coordMotion[2] : null;
		final boolean[] localParamSelect = getLocalParamMaskForPair(
				paramSelect,
				nscene,
				refScene,
				centerIndex,
				disableErs);
		final IntersceneLma lma = new IntersceneLma(
				clt_parameters.ilp.ilma_thread_invariant,
				clt_parameters.ilp.ilma_3d_lma ? clt_parameters.ilp.ilma_disparity_weight : 0.0);
		final ErsCorrection ersRef = quadCLTs[refScene].getErsCorrection();
		final ErsCorrection ersScene = quadCLTs[nscene].getErsCorrection();
		final double[] refWatrBak = ersRef.ers_watr_center_dt.clone();
		final double[] refWxyzBak = ersRef.ers_wxyz_center_dt.clone();
		final double[] sceneWatrBak = ersScene.ers_watr_center_dt.clone();
		final double[] sceneWxyzBak = ersScene.ers_wxyz_center_dt.clone();
		ersRef.ers_watr_center_dt = refRates[1].clone();
		ersRef.ers_wxyz_center_dt = refRates[0].clone();
		ersScene.ers_watr_center_dt = sceneRates[1].clone();
		ersScene.ers_wxyz_center_dt = sceneRates[0].clone();
		lma.prepareLMA(
				poseNow,
				null,
				refPose,
				quadCLTs[nscene],
				quadCLTs[refScene],
				localParamSelect,
				paramRegweights,
				clt_parameters.imp.eig_max_sqrt,
				clt_parameters.imp.eig_min_sqrt,
				eigen,
				coordMotion[1],
				reliableRef,
				coordMotion[0],
				false,
				false,
				null,
				debugLevel);
		ersRef.ers_watr_center_dt = refWatrBak;
		ersRef.ers_wxyz_center_dt = refWxyzBak;
		ersScene.ers_watr_center_dt = sceneWatrBak;
		ersScene.ers_wxyz_center_dt = sceneWxyzBak;
		final IntersceneLma.NormalEquationData ne = lma.getNormalEquationData(0.0);
		if ((ne == null) || (ne.h == null) || (ne.b == null) || (ne.parameterIndices == null)) {
			return ls;
		}
		accumulateMappedNormalEquation(
				ne,
				nscene,
				refScene,
				centerIndex,
				sceneParIndices,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				activePoseScene,
				diag,
				offDiag1,
				offDiag2,
				rhs);
		ls.valid = true;
		ls.rms = (ne.rms == null) ? null : ne.rms.clone();
		return ls;
	}

	private static boolean[] getLocalParamMaskForPair(
			final boolean[] paramSelect,
			final int sceneIndex,
			final int refSceneIndex,
			final int centerIndex,
			final boolean disableErs) {
		// Local mask policy:
		// - Independent optimization parameters are scene XYZATR only.
		// - Reference XYZATR is always fixed.
		// - Rate terms are enabled only as dependent auxiliaries for chain-rule mapping.
		final boolean[] mask = new boolean[ErsCorrection.DP_NUM_PARS];
		if (paramSelect != null) {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				final int pi = SCENE_POSE_PAR_INDICES[i];
				if (pi < paramSelect.length) {
					mask[pi] = paramSelect[pi];
				}
			}
		} else {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				mask[SCENE_POSE_PAR_INDICES[i]] = true;
			}
		}
		if (sceneIndex == centerIndex) {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				mask[SCENE_POSE_PAR_INDICES[i]] = false;
			}
		}
		for (int i = 0; i < REF_RATE_ANG_PAR_INDICES.length; i++) {
			mask[REF_RATE_ANG_PAR_INDICES[i]] = (refSceneIndex != centerIndex) && !disableErs;
			mask[SCENE_RATE_ANG_PAR_INDICES[i]] = !disableErs;
			mask[REF_RATE_LIN_PAR_INDICES[i]] = (refSceneIndex != centerIndex);
			mask[SCENE_RATE_LIN_PAR_INDICES[i]] = true;
		}
		return mask;
	}

	private static void zeroRateRegularization(final double[] paramRegweights) {
		for (int i = 0; i < REF_RATE_ANG_PAR_INDICES.length; i++) {
			paramRegweights[REF_RATE_ANG_PAR_INDICES[i]] = 0.0;
			paramRegweights[REF_RATE_LIN_PAR_INDICES[i]] = 0.0;
			paramRegweights[SCENE_RATE_ANG_PAR_INDICES[i]] = 0.0;
			paramRegweights[SCENE_RATE_LIN_PAR_INDICES[i]] = 0.0;
		}
	}

	private static double[][] getScenePose(
			final double[][][] scenes_xyzatr,
			final int sceneIndex,
			final int centerIndex) {
		if (sceneIndex == centerIndex) {
			return new double[][] {
				Interscene.ZERO3.clone(),
				Interscene.ZERO3.clone()};
		}
		if ((sceneIndex < 0) || (sceneIndex >= scenes_xyzatr.length) || (scenes_xyzatr[sceneIndex] == null)) {
			return new double[][] {
				Interscene.ZERO3.clone(),
				Interscene.ZERO3.clone()};
		}
		return new double[][] {
			scenes_xyzatr[sceneIndex][0].clone(),
			scenes_xyzatr[sceneIndex][1].clone()};
	}

	private static double[] getSceneTimes(
			final QuadCLT[] quadCLTs,
			final int firstScene,
			final int lastScene) {
		final double[] sceneTimes = new double[quadCLTs.length];
		Arrays.fill(sceneTimes, Double.NaN);
		for (int nscene = firstScene; nscene <= lastScene; nscene++) {
			if (quadCLTs[nscene] == null) {
				continue;
			}
			sceneTimes[nscene] = parseSceneTimestamp(quadCLTs[nscene].getImageName());
		}
		double t = 0.0;
		for (int nscene = firstScene; nscene <= lastScene; nscene++) {
			if (Double.isNaN(sceneTimes[nscene])) {
				sceneTimes[nscene] = t;
			}
			t = sceneTimes[nscene] + DEFAULT_DT;
		}
		return sceneTimes;
	}

	private static double parseSceneTimestamp(final String ts) {
		if (ts == null) {
			return Double.NaN;
		}
		final String[] parts = ts.trim().split("_");
		if (parts.length != 2) {
			return Double.NaN;
		}
		try {
			final long sec = Long.parseLong(parts[0]);
			final long usec = Long.parseLong(parts[1]);
			return sec + (usec * 1.0e-6);
		} catch (NumberFormatException nfe) {
			return Double.NaN;
		}
	}

	private static int findPrevScene(
			final int sceneIndex,
			final int firstScene,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr) {
		for (int i = sceneIndex - 1; i >= firstScene; i--) {
			if ((quadCLTs[i] != null) && (scenes_xyzatr[i] != null)) {
				return i;
			}
		}
		return -1;
	}

	private static int findNextScene(
			final int sceneIndex,
			final int lastScene,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr) {
		for (int i = sceneIndex + 1; i <= lastScene; i++) {
			if ((quadCLTs[i] != null) && (scenes_xyzatr[i] != null)) {
				return i;
			}
		}
		return -1;
	}

	private static double getPoseComponent(
			final double[][][] scenes_xyzatr,
			final int sceneIndex,
			final int centerIndex,
			final boolean angular,
			final int comp) {
		if (sceneIndex == centerIndex) {
			return 0.0;
		}
		if ((sceneIndex < 0) || (sceneIndex >= scenes_xyzatr.length) || (scenes_xyzatr[sceneIndex] == null)) {
			return 0.0;
		}
		return angular ? scenes_xyzatr[sceneIndex][1][comp] : scenes_xyzatr[sceneIndex][0][comp];
	}

	private static double safeDt(
			final int i0,
			final int i1,
			final double[] sceneTimes) {
		if ((i0 < 0) || (i1 < 0) || (i0 >= sceneTimes.length) || (i1 >= sceneTimes.length)) {
			return DEFAULT_DT;
		}
		final double dt = Math.abs(sceneTimes[i1] - sceneTimes[i0]);
		return (dt > MIN_DIAG) ? dt : DEFAULT_DT;
	}

	private static double wrapPi(double a) {
		while (a > Math.PI) {
			a -= (2.0 * Math.PI);
		}
		while (a < -Math.PI) {
			a += (2.0 * Math.PI);
		}
		return a;
	}

	private static ArrayList<RateTerm> getRateTerms(
			final int sceneIndex,
			final int firstScene,
			final int lastScene,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr,
			final double[] sceneTimes) {
		final int prev = findPrevScene(
				sceneIndex,
				firstScene,
				quadCLTs,
				scenes_xyzatr);
		final int next = findNextScene(
				sceneIndex,
				lastScene,
				quadCLTs,
				scenes_xyzatr);
		final ArrayList<RateTerm> terms = new ArrayList<RateTerm>(2);
		if ((prev < 0) && (next < 0)) {
			return terms;
		}
		if (prev < 0) {
			final double dt = safeDt(
					sceneIndex,
					next,
					sceneTimes);
			terms.add(new RateTerm(sceneIndex, -1.0 / dt));
			terms.add(new RateTerm(next,       1.0 / dt));
			return terms;
		}
		if (next < 0) {
			final double dt = safeDt(
					prev,
					sceneIndex,
					sceneTimes);
			terms.add(new RateTerm(prev,      -1.0 / dt));
			terms.add(new RateTerm(sceneIndex, 1.0 / dt));
			return terms;
		}
		final double dt = safeDt(
				prev,
				next,
				sceneTimes);
		terms.add(new RateTerm(prev, -1.0 / dt));
		terms.add(new RateTerm(next,  1.0 / dt));
		return terms;
	}

	private static double getRateValue(
			final int sceneIndex,
			final boolean angular,
			final int comp,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final double[][][] scenes_xyzatr,
			final QuadCLT[] quadCLTs,
			final double[] sceneTimes) {
		final int prev = findPrevScene(
				sceneIndex,
				firstScene,
				quadCLTs,
				scenes_xyzatr);
		final int next = findNextScene(
				sceneIndex,
				lastScene,
				quadCLTs,
				scenes_xyzatr);
		if ((prev < 0) && (next < 0)) {
			return 0.0;
		}
		if (prev < 0) {
			final double dt = safeDt(
					sceneIndex,
					next,
					sceneTimes);
			final double d = getPoseComponent(scenes_xyzatr, next, centerIndex, angular, comp) -
					getPoseComponent(scenes_xyzatr, sceneIndex, centerIndex, angular, comp);
			return (angular ? wrapPi(d) : d) / dt;
		}
		if (next < 0) {
			final double dt = safeDt(
					prev,
					sceneIndex,
					sceneTimes);
			final double d = getPoseComponent(scenes_xyzatr, sceneIndex, centerIndex, angular, comp) -
					getPoseComponent(scenes_xyzatr, prev, centerIndex, angular, comp);
			return (angular ? wrapPi(d) : d) / dt;
		}
		final double dt = safeDt(
				prev,
				next,
				sceneTimes);
		final double d = getPoseComponent(scenes_xyzatr, next, centerIndex, angular, comp) -
				getPoseComponent(scenes_xyzatr, prev, centerIndex, angular, comp);
		return (angular ? wrapPi(d) : d) / dt;
	}

	private static double[][] getSceneRates(
			final int sceneIndex,
			final double[][][] scenes_xyzatr,
			final QuadCLT[] quadCLTs,
			final double[] sceneTimes,
			final int firstScene,
			final int lastScene,
			final int centerIndex) {
		final double[][] rates = new double[2][3]; // [xyz,atr]
		for (int i = 0; i < 3; i++) {
			rates[0][i] = getRateValue(
					sceneIndex,
					false,
					i,
					firstScene,
					lastScene,
					centerIndex,
					scenes_xyzatr,
					quadCLTs,
					sceneTimes);
			rates[1][i] = getRateValue(
					sceneIndex,
					true,
					i,
					firstScene,
					lastScene,
					centerIndex,
					scenes_xyzatr,
					quadCLTs,
					sceneTimes);
		}
		return rates;
	}

	// -----------------------------------------------------------------------------
	// Chain-rule projection: local pair normal equations -> global pose-only system
	// -----------------------------------------------------------------------------

	private static ArrayList<GlobalTerm> mapLocalParameterToGlobalTerms(
			final int parIndex,
			final int sceneIndex,
			final int refSceneIndex,
			final int centerIndex,
			final int[] sceneParIndices,
			final int firstScene,
			final int lastScene,
			final boolean[] activePoseScene,
			final double[][][] scenes_xyzatr,
			final QuadCLT[] quadCLTs,
			final double[] sceneTimes) {
		final ArrayList<GlobalTerm> rslt = new ArrayList<GlobalTerm>(2);
		switch (parIndex) {
		case ErsCorrection.DP_DSAZ:
		case ErsCorrection.DP_DSTL:
		case ErsCorrection.DP_DSRL:
		case ErsCorrection.DP_DSX:
		case ErsCorrection.DP_DSY:
		case ErsCorrection.DP_DSZ:
		{
			final int pLocal = mapSceneParameterIndex(
					parIndex,
					sceneParIndices);
			if (pLocal >= 0) {
				final int ivar = sceneIndex - firstScene;
				if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
					rslt.add(new GlobalTerm(ivar, pLocal, 1.0));
				}
			}
			break;
		}
		case ErsCorrection.DP_DSVAZ:
		case ErsCorrection.DP_DSVTL:
		case ErsCorrection.DP_DSVRL:
		{
			final int comp = parIndex - ErsCorrection.DP_DSVAZ;
			final int pLocal = mapSceneParameterIndex(
					ErsCorrection.DP_DSAZ + comp,
					sceneParIndices);
			if (pLocal >= 0) {
				final ArrayList<RateTerm> rts = getRateTerms(
						sceneIndex,
						firstScene,
						lastScene,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes);
				for (RateTerm rt : rts) {
					if (rt.sceneIndex == centerIndex) {
						continue;
					}
					final int ivar = rt.sceneIndex - firstScene;
					if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
						rslt.add(new GlobalTerm(ivar, pLocal, rt.coeff));
					}
				}
			}
			break;
		}
		case ErsCorrection.DP_DSVX:
		case ErsCorrection.DP_DSVY:
		case ErsCorrection.DP_DSVZ:
		{
			final int comp = parIndex - ErsCorrection.DP_DSVX;
			final int pLocal = mapSceneParameterIndex(
					ErsCorrection.DP_DSX + comp,
					sceneParIndices);
			if (pLocal >= 0) {
				final ArrayList<RateTerm> rts = getRateTerms(
						sceneIndex,
						firstScene,
						lastScene,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes);
				for (RateTerm rt : rts) {
					if (rt.sceneIndex == centerIndex) {
						continue;
					}
					final int ivar = rt.sceneIndex - firstScene;
					if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
						rslt.add(new GlobalTerm(ivar, pLocal, rt.coeff));
					}
				}
			}
			break;
		}
		case ErsCorrection.DP_DVAZ:
		case ErsCorrection.DP_DVTL:
		case ErsCorrection.DP_DVRL:
		{
			final int comp = parIndex - ErsCorrection.DP_DVAZ;
			final int pLocal = mapSceneParameterIndex(
					ErsCorrection.DP_DSAZ + comp,
					sceneParIndices);
			if (pLocal >= 0) {
				final ArrayList<RateTerm> rts = getRateTerms(
						refSceneIndex,
						firstScene,
						lastScene,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes);
				for (RateTerm rt : rts) {
					if (rt.sceneIndex == centerIndex) {
						continue;
					}
					final int ivar = rt.sceneIndex - firstScene;
					if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
						rslt.add(new GlobalTerm(ivar, pLocal, rt.coeff));
					}
				}
			}
			break;
		}
		case ErsCorrection.DP_DVX:
		case ErsCorrection.DP_DVY:
		case ErsCorrection.DP_DVZ:
		{
			final int comp = parIndex - ErsCorrection.DP_DVX;
			final int pLocal = mapSceneParameterIndex(
					ErsCorrection.DP_DSX + comp,
					sceneParIndices);
			if (pLocal >= 0) {
				final ArrayList<RateTerm> rts = getRateTerms(
						refSceneIndex,
						firstScene,
						lastScene,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes);
				for (RateTerm rt : rts) {
					if (rt.sceneIndex == centerIndex) {
						continue;
					}
					final int ivar = rt.sceneIndex - firstScene;
					if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
						rslt.add(new GlobalTerm(ivar, pLocal, rt.coeff));
					}
				}
			}
			break;
		}
		default:
		}
		return rslt;
	}

	private static void addToBlockBand(
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final int i,
			final int j,
			final int ri,
			final int cj,
			final double v) {
		if (i == j) {
			diag[i][ri][cj] += v;
			return;
		}
		final int d = j - i;
		if (Math.abs(d) == 1) {
			if (d > 0) {
				offDiag1[i][ri][cj] += v;
			} else {
				offDiag1[j][cj][ri] += v;
			}
		} else if (Math.abs(d) == 2) {
			if (d > 0) {
				offDiag2[i][ri][cj] += v;
			} else {
				offDiag2[j][cj][ri] += v;
			}
		}
	}

	// -----------------------------------------------------------------------------
	// Global system assembly / solve
	// -----------------------------------------------------------------------------

	private static void accumulateMappedNormalEquation(
			final IntersceneLma.NormalEquationData ne,
			final int sceneIndex,
			final int refSceneIndex,
			final int centerIndex,
			final int[] sceneParIndices,
			final double[][][] scenes_xyzatr,
			final QuadCLT[] quadCLTs,
			final double[] sceneTimes,
			final int firstScene,
			final int lastScene,
			final boolean[] activePoseScene,
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs) {
		final ArrayList<GlobalTerm>[] maps = new ArrayList[ne.parameterIndices.length];
		for (int i = 0; i < maps.length; i++) {
			maps[i] = mapLocalParameterToGlobalTerms(
					ne.parameterIndices[i],
					sceneIndex,
					refSceneIndex,
					centerIndex,
					sceneParIndices,
					firstScene,
					lastScene,
					activePoseScene,
					scenes_xyzatr,
					quadCLTs,
					sceneTimes);
		}
		for (int i = 0; i < ne.parameterIndices.length; i++) {
			if (maps[i].isEmpty()) {
				continue;
			}
			for (GlobalTerm gi : maps[i]) {
				rhs[gi.varIndex][gi.parIndex] += gi.coeff * ne.b[i];
			}
			for (int j = 0; j < ne.parameterIndices.length; j++) {
				if (maps[j].isEmpty()) {
					continue;
				}
				for (GlobalTerm gi : maps[i]) {
					for (GlobalTerm gj : maps[j]) {
						addToBlockBand(
								diag,
								offDiag1,
								offDiag2,
								gi.varIndex,
								gj.varIndex,
								gi.parIndex,
								gj.parIndex,
								gi.coeff * ne.h[i][j] * gj.coeff);
					}
				}
			}
		}
	}

	private static boolean[] getSceneParamMask(
			final boolean[] paramSelect,
			final boolean disableErs) {
		// Note: disableErs is intentionally ignored here; global unknowns are pose-only.
		final boolean[] mask = new boolean[ErsCorrection.DP_NUM_PARS];
		if (paramSelect == null) {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				mask[SCENE_POSE_PAR_INDICES[i]] = true;
			}
		} else {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				final int pi = SCENE_POSE_PAR_INDICES[i];
				if (pi < paramSelect.length) {
					mask[pi] = paramSelect[pi];
				}
			}
		}
		// keep only pose slots for global unknowns (reference scenes remain fixed)
		return mask;
	}

	private static int[] getActiveSceneParameterIndices(final boolean[] sceneMask) {
		int n = 0;
		for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
			if (sceneMask[SCENE_POSE_PAR_INDICES[i]]) {
				n++;
			}
		}
		if (n == 0) {
			return new int[0];
		}
		final int[] indices = new int[n];
		int indx = 0;
		for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
			final int pi = SCENE_POSE_PAR_INDICES[i];
			if (sceneMask[pi]) {
				indices[indx++] = pi;
			}
		}
		return indices;
	}

	private static double[] getSceneLpfWeights(
			final int[] sceneParIndices,
			final double[] paramLpf,
			final Options options) {
		final double[] rslt = new double[sceneParIndices.length];
		for (int i = 0; i < sceneParIndices.length; i++) {
			final int parIndex = sceneParIndices[i];
			if ((paramLpf != null) && (parIndex < paramLpf.length)) {
				rslt[i] = paramLpf[parIndex];
			} else {
				switch (parIndex) {
				case ErsCorrection.DP_DSAZ:
				case ErsCorrection.DP_DSTL:
				case ErsCorrection.DP_DSRL:
					rslt[i] = options.smoothWeightAtr;
					break;
				case ErsCorrection.DP_DSX:
				case ErsCorrection.DP_DSY:
				case ErsCorrection.DP_DSZ:
					rslt[i] = options.smoothWeightXyz;
					break;
				default:
					rslt[i] = 0.0;
				}
			}
		}
		return rslt;
	}

	private static int mapSceneParameterIndex(
			final int parIndex,
			final int[] sceneParIndices) {
		for (int i = 0; i < sceneParIndices.length; i++) {
			if (sceneParIndices[i] == parIndex) {
				return i;
			}
		}
		return -1;
	}

	private static double getSceneParameterValue(
			final double[][] xyzatr,
			final int parIndex) {
		switch (parIndex) {
		case ErsCorrection.DP_DSAZ:
			return xyzatr[1][0];
		case ErsCorrection.DP_DSTL:
			return xyzatr[1][1];
		case ErsCorrection.DP_DSRL:
			return xyzatr[1][2];
		case ErsCorrection.DP_DSX:
			return xyzatr[0][0];
		case ErsCorrection.DP_DSY:
			return xyzatr[0][1];
		case ErsCorrection.DP_DSZ:
			return xyzatr[0][2];
		default:
			return 0.0;
		}
	}

	private static void setSceneParameterValue(
			final double[][] xyzatr,
			final int parIndex,
			final double value) {
		switch (parIndex) {
		case ErsCorrection.DP_DSAZ:
			xyzatr[1][0] = value;
			break;
		case ErsCorrection.DP_DSTL:
			xyzatr[1][1] = value;
			break;
		case ErsCorrection.DP_DSRL:
			xyzatr[1][2] = value;
			break;
		case ErsCorrection.DP_DSX:
			xyzatr[0][0] = value;
			break;
		case ErsCorrection.DP_DSY:
			xyzatr[0][1] = value;
			break;
		case ErsCorrection.DP_DSZ:
			xyzatr[0][2] = value;
			break;
		default:
		}
	}

	private static double[] getSceneVector(
			final double[][] xyzatr,
			final int[] sceneParIndices) {
		final double[] rslt = new double[sceneParIndices.length];
		for (int i = 0; i < sceneParIndices.length; i++) {
			rslt[i] = getSceneParameterValue(
					xyzatr,
					sceneParIndices[i]);
		}
		return rslt;
	}

	private static void setSceneVector(
			final double[][] xyzatr,
			final double[] vec,
			final int[] sceneParIndices) {
		for (int i = 0; i < sceneParIndices.length; i++) {
			setSceneParameterValue(
					xyzatr,
					sceneParIndices[i],
					vec[i]);
		}
	}

	private static void setSceneState(
			final double[] x,
			final int ivar,
			final double[] state,
			final int npars) {
		System.arraycopy(state, 0, x, ivar * npars, npars);
	}

	private static double[] getSceneState(
			final double[] x,
			final int ivar,
			final int npars) {
		final double[] rslt = new double[npars];
		System.arraycopy(x, ivar * npars, rslt, 0, npars);
		return rslt;
	}

	private static void assembleCurvatureBlocks(
			final double[] x,
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs,
			final double[] paramLpfScene,
			final boolean[] activePoseScene,
			final int npars) {
		if (diag.length < 3) {
			return;
		}
		for (int i = 1; i < (diag.length - 1); i++) {
			if (!activePoseScene[i - 1] || !activePoseScene[i] || !activePoseScene[i + 1]) {
				continue;
			}
			for (int k = 0; k < npars; k++) {
				final double w = paramLpfScene[k];
				if (w <= 0.0) {
					continue;
				}
				diag[i][k][k] += w;
				diag[i - 1][k][k] += 0.25 * w;
				diag[i + 1][k][k] += 0.25 * w;
				offDiag1[i - 1][k][k] += -0.5 * w;
				offDiag1[i][k][k] += -0.5 * w;
				offDiag2[i - 1][k][k] += 0.25 * w;
				final double e = x[i * npars + k] - 0.5 * (x[(i - 1) * npars + k] + x[(i + 1) * npars + k]);
				rhs[i][k] += -w * e;
				rhs[i - 1][k] += 0.5 * w * e;
				rhs[i + 1][k] += 0.5 * w * e;
			}
		}
	}

	private static void regularizeDiagonalAndFixInactive(
			final double[][][] diag,
			final double[][] rhs,
			final double lambdaDiag,
			final boolean[] activePoseScene,
			final int npars) {
		for (int i = 0; i < diag.length; i++) {
			if (!activePoseScene[i]) {
				for (int k = 0; k < npars; k++) {
					for (int c = 0; c < npars; c++) {
						diag[i][k][c] = 0.0;
					}
					diag[i][k][k] = 1.0;
					rhs[i][k] = 0.0;
				}
				continue;
			}
			for (int k = 0; k < npars; k++) {
				final double d = Math.max(MIN_DIAG, Math.abs(diag[i][k][k]));
				diag[i][k][k] += lambdaDiag * d;
			}
		}
	}

	private static int solvePcg(
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs,
			final double[] delta,
			final int maxIter,
			final double tol,
			final int npars) {
		final int nvars = diag.length;
		final int n = nvars * npars;
		Arrays.fill(delta, 0.0);
		final double[] b = new double[n];
		final double[] r = new double[n];
		final double[] z = new double[n];
		final double[] p = new double[n];
		final double[] ap = new double[n];
		final double[] invDiag = new double[n];
		for (int i = 0; i < nvars; i++) {
			for (int k = 0; k < npars; k++) {
				final int idx = i * npars + k;
				b[idx] = rhs[i][k];
				r[idx] = b[idx];
				invDiag[idx] = 1.0 / Math.max(MIN_DIAG, diag[i][k][k]);
				z[idx] = r[idx] * invDiag[idx];
				p[idx] = z[idx];
			}
		}
		double rz = dot(r, z);
		if (Math.abs(rz) < MIN_DIAG) {
			return 0;
		}
		int iter;
		for (iter = 0; iter < maxIter; iter++) {
			matVec(
					diag,
					offDiag1,
					offDiag2,
					p,
					ap,
					npars);
			final double pAp = dot(p, ap);
			if (Math.abs(pAp) < MIN_DIAG) {
				break;
			}
			final double alpha = rz / pAp;
			axpy(alpha, p, delta);
			axpy(-alpha, ap, r);
			if (normInf(r) < tol) {
				iter++;
				break;
			}
			for (int i = 0; i < n; i++) {
				z[i] = r[i] * invDiag[i];
			}
			final double rzNext = dot(r, z);
			if (Math.abs(rzNext) < MIN_DIAG) {
				break;
			}
			final double beta = rzNext / rz;
			for (int i = 0; i < n; i++) {
				p[i] = z[i] + beta * p[i];
			}
			rz = rzNext;
		}
		return iter;
	}

	private static void matVec(
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[] v,
			final double[] out,
			final int npars) {
		Arrays.fill(out, 0.0);
		final int nvars = diag.length;
		final Thread[] threads = ImageDtt.newThreadArray(Math.min(QuadCLT.THREADS_MAX, Math.max(1, nvars)));
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				@Override
				public void run() {
					for (int i = ai.getAndIncrement(); i < nvars; i = ai.getAndIncrement()) {
						final int i0 = i * npars;
						for (int r = 0; r < npars; r++) {
							double y = 0.0;
							for (int c = 0; c < npars; c++) {
								y += diag[i][r][c] * v[i0 + c];
							}
							if (i > 0) {
								for (int c = 0; c < npars; c++) {
									y += offDiag1[i - 1][c][r] * v[(i - 1) * npars + c];
								}
							}
							if (i < (nvars - 1)) {
								for (int c = 0; c < npars; c++) {
									y += offDiag1[i][r][c] * v[(i + 1) * npars + c];
								}
							}
							if (i > 1) {
								for (int c = 0; c < npars; c++) {
									y += offDiag2[i - 2][c][r] * v[(i - 2) * npars + c];
								}
							}
							if (i < (nvars - 2)) {
								for (int c = 0; c < npars; c++) {
									y += offDiag2[i][r][c] * v[(i + 2) * npars + c];
								}
							}
							out[i0 + r] = y;
						}
					}
				}
			};
		}
		ImageDtt.startAndJoin(threads);
	}

	private static double dot(final double[] a, final double[] b) {
		final int n = a.length;
		final int nthreads = Math.min(QuadCLT.THREADS_MAX, Math.max(1, n / 256));
		final Thread[] threads = ImageDtt.newThreadArray(nthreads);
		final AtomicInteger ai = new AtomicInteger(0);
		final double[] sums = new double[nthreads];
		for (int ithread = 0; ithread < nthreads; ithread++) {
			final int tid = ithread;
			threads[ithread] = new Thread() {
				@Override
				public void run() {
					double s = 0.0;
					for (int i = ai.getAndIncrement(); i < n; i = ai.getAndIncrement()) {
						s += a[i] * b[i];
					}
					sums[tid] = s;
				}
			};
		}
		ImageDtt.startAndJoin(threads);
		double sum = 0.0;
		for (double s : sums) {
			sum += s;
		}
		return sum;
	}

	private static void axpy(final double alpha, final double[] x, final double[] y) {
		for (int i = 0; i < y.length; i++) {
			y[i] += alpha * x[i];
		}
	}

	private static double normInf(final double[] v) {
		double m = 0.0;
		for (int i = 0; i < v.length; i++) {
			m = Math.max(m, Math.abs(v[i]));
		}
		return m;
	}
}
