/**
 **
 ** IntersceneGlobalRefine - Global sparse pose refinement around a fixed reference scene
 **
 ** Copyright (C) 2026 Elphel, Inc.
 **
 ** -----------------------------------------------------------------------------**
 **
 **  IntersceneGlobalRefine.java is free software: you can redistribute it and/or modify
 **  it under the terms of the GNU General Public License as published by
 **  the Free Software Foundation, either version 3 of the License, or
 **  (at your option) any later version.
 **
 **  This program is distributed in the hope that it will be useful,
 **  but WITHOUT ANY WARRANTY; without even the implied warranty of
 **  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 **  GNU General Public License for more details.
 **
 **  You should have received a copy of the GNU General Public License
 **  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 ** -----------------------------------------------------------------------------**
 **
 */
package com.elphel.imagej.tileprocessor;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.concurrent.atomic.AtomicInteger;

import com.elphel.imagej.cameras.CLTParameters;
import com.elphel.imagej.common.ShowDoubleFloatArrays;
import com.elphel.imagej.gpu.TpTask;

import ij.ImagePlus;

/**
 * Temporary implementation/debug map (keep while stabilizing global refinement).
 *
 * <p>What this class currently does:
 * <ul>
 *   <li>Runs global refinement over a segment with one fixed center scene.</li>
 *   <li>Global unknowns are pose-only scene parameters: {@code DSAZ..DSZ} for non-center scenes.</li>
 *   <li>Builds per-pair local normal equations using {@link IntersceneLma} + GPU correlation.</li>
 *   <li>Supports both center-reference pairs and selected non-center parent pairs (FPN mitigation),
 *       using the same pairing logic as legacy code ({@link Interscene#getFPNPairs}).</li>
 *   <li>Projects local equations to global pose-only unknowns via chain rule:
 *       local rates ({@code DSV*, DV*}) are treated as dependent variables from neighboring poses.</li>
 * </ul>
 *
 * <p>Velocity/omega model:
 * <ul>
 *   <li>Rates are recomputed from neighboring scene poses each outer iteration.</li>
 *   <li>Interior scenes use centered differences; boundaries use one-sided differences.</li>
 *   <li>ATR differences are wrapped with {@code +/-pi} safety before division by {@code dt}.</li>
 *   <li>These rates are used in two places:
 *       GPU motion-blur vectors and local Jacobian-to-global chain mapping.</li>
 * </ul>
 *
 * <p>Global matrix data structures (block-sparse, band-limited in scene index):
 * <ul>
 *   <li>{@code nvars = lastScene-firstScene+1}, including center index slot (fixed by constraints).</li>
 *   <li>{@code npars = number of active pose params per scene} (subset of {@code DSAZ..DSZ}).</li>
 *   <li>Linearized system: {@code H * delta = b}.</li>
 *   <li>{@code diag[nvars][npars][npars]} stores block diagonal {@code H(i,i)}.</li>
 *   <li>{@code offDiag1[nvars-1][npars][npars]} stores block {@code H(i,i+1)}.</li>
 *   <li>{@code offDiag2[nvars-2][npars][npars]} stores block {@code H(i,i+2)}.</li>
 *   <li>By symmetry, {@code H(i+1,i)} and {@code H(i+2,i)} are implied/transposed in mat-vec.</li>
 *   <li>{@code rhs[nvars][npars]} stores the block vector {@code b}.</li>
 * </ul>
 *
 * <p>Assembly pipeline:
 * <ul>
 *   <li>{@code buildPairFactors()} creates scene-reference factors (center + optional non-center).</li>
 *   <li>{@code getReferenceGpuData()} prepares/caches GPU reference TD tasks per reference scene.</li>
 *   <li>Before GPU correlation, overlap masks are derived from center-frame offsets
 *       ({@code estimateCenterShiftXY()+maskByOverlap()}) to suppress far-field mapping fold artifacts.
 *       For non-center parent references, masks are intersected for both center->scene and center->ref.</li>
 *   <li>{@code buildObservationCache()} runs GPU pair correlation once per outer iteration and stores
 *       fixed-size observation cache for all pairs:
 *       {@code Y[pair][tile*2 + {0,1}] = {dx,dy}} and
 *       {@code W[pair][tile] = strength}.</li>
 *   <li>{@code accumulateCachedLocalSystem()} reuses cached observations in inner LM iterations.</li>
 *   <li>{@code accumulateMappedNormalEquation()} maps local params to global pose terms and adds:
 *       {@code A^T * H_local * A} and {@code A^T * b_local}.</li>
 *   <li>{@code assembleCurvatureBlocks()} adds 2nd-order inter-scene LPF:
 *       {@code x_i - 0.5*(x_{i-1}+x_{i+1})}.</li>
 *   <li>{@code regularizeDiagonalAndFixInactive()} adds LM-like diagonal and hard-fixes inactive slots.</li>
 *   <li>{@code solvePcg()} solves with multithreaded block-band mat-vec.</li>
 * </ul>
 *
 * <p>Important constraints:
 * <ul>
 *   <li>Reference scene pose is fixed (center pose is hard zero in this mode).</li>
 *   <li>{@code param_select} is interpreted only for scene pose params ({@code DSAZ..DSZ}).</li>
 *   <li>Rate params are auxiliary for local model/chain rule, not global independent unknowns.</li>
 * </ul>
 */
public class IntersceneGlobalRefine {
	private static final int[] SCENE_POSE_PAR_INDICES = {
			ErsCorrection.DP_DSAZ,
			ErsCorrection.DP_DSTL,
			ErsCorrection.DP_DSRL,
			ErsCorrection.DP_DSX,
			ErsCorrection.DP_DSY,
			ErsCorrection.DP_DSZ};
	private static final int[] REF_POSE_PAR_INDICES = {
			ErsCorrection.DP_DAZ,
			ErsCorrection.DP_DTL,
			ErsCorrection.DP_DRL,
			ErsCorrection.DP_DX,
			ErsCorrection.DP_DY,
			ErsCorrection.DP_DZ};
	private static final int[] REF_RATE_ANG_PAR_INDICES = {
			ErsCorrection.DP_DVAZ,
			ErsCorrection.DP_DVTL,
			ErsCorrection.DP_DVRL};
	private static final int[] REF_RATE_LIN_PAR_INDICES = {
			ErsCorrection.DP_DVX,
			ErsCorrection.DP_DVY,
			ErsCorrection.DP_DVZ};
	private static final int[] SCENE_RATE_ANG_PAR_INDICES = {
			ErsCorrection.DP_DSVAZ,
			ErsCorrection.DP_DSVTL,
			ErsCorrection.DP_DSVRL};
	private static final int[] SCENE_RATE_LIN_PAR_INDICES = {
			ErsCorrection.DP_DSVX,
			ErsCorrection.DP_DSVY,
			ErsCorrection.DP_DSVZ};
	private static final double MIN_DIAG = 1.0e-12;
	private static final double MAX_EFFECTIVE_LPF_WEIGHT = 1.0e12;
	private static final double DEFAULT_MAX_OFFSET = 256.0;
	private static final double DEFAULT_DT = 1.0 / 60.0;

	public static class Options {
		public int outerIterations = 3;
		public int innerIterations = 8;
		public int pcgIterations = 160;
		public double pcgTolerance = 1.0e-7;
		public double deltaStop = 1.0e-5;
		public double rmsDiffStop = 0.001;
		public double lambdaDiag = 0.1;
		public double lambdaScaleGood = 0.5;
		public double lambdaScaleBad = 8.0;
		public double lambdaMax = 100.0;
		public double smoothWeightAtr = 1.0;
		public double smoothWeightXyz = 0.15;
		public double minOffset = 3.0;
		public double maxOffset = DEFAULT_MAX_OFFSET;
		public boolean debugDumpObservationHyperstack = false;
		public boolean debugShowObservationHyperstack = false;
		// Debug/test mode: keep rate terms fixed during inner loop and disable
		// rate-to-pose chain-rule mapping in global Jacobian assembly.
		public boolean freezeRateChainInInner = false;
		// Debug/test mode: dump per-inner scene columns in CSV (X..R, CURV, RMS for each inner step).
		public boolean testInnerResultsCsv = false;
		// Debug/test mode: after first convergence, run one extra outer pass to verify full recorrelation cycle.
		public boolean runOneMoreOuterAfterConverged = false;
		// Save only final combined initial+final CSV and suppress per-outer TIFF/CSV dumps.
		public boolean saveInitialFinalOnly = false;
		// Temporary test mode:
		// 0 - all pairs weight 1.0
		// 1 - pairs at +/-1 from center (with center as reference) weight 0.0
		// 2 - pairs at +/-1 and +/-2 from center (with center as reference) weight 0.0
		public int centerPairWeightMode = 0;
	}

	public static class Result {
		public int outerDone = 0;
		public int solvedScenes = 0;
		public int failedScenes = 0;
		public double maxDelta = Double.NaN;
		public double avgPairRms = Double.NaN;
		public double avgPairRmsPure = Double.NaN;
		public int pcgIterationsLast = 0;
	}

	/**
	 * MCP-visible snapshot of global LMA progress.
	 *
	 * <p>This snapshot is intentionally compact and immutable from callers' perspective:
	 * MCP reads it via {@link #getProgressSnapshot()} to monitor long-running
	 * {@code Aux Build Series} / Global LMA runs.
	 */
	public static class ProgressSnapshot {
		public boolean active = false;
		public String status = "idle";
		public String stage = "";
		public long startedMs = 0L;
		public long updatedMs = 0L;
		public long finishedMs = 0L;
		public int centerIndex = -1;
		public int firstScene = -1;
		public int lastScene = -1;
		public int outerIterations = 0;
		public int innerIterations = 0;
		public int outer = -1;
		public int inner = -1;
		public int cachedPairs = 0;
		public int correlationSolved = 0;
		public int correlationFailed = 0;
		public int tilesPerPair = 0;
		public int solvedScenes = 0;
		public int failedScenes = 0;
		public int solvedPairs = 0;
		public int failedPairs = 0;
		public int pcgIter = 0;
		public double avgPairRms = Double.NaN;
		public double avgPairRmsPure = Double.NaN;
		public double maxDelta = Double.NaN;
		public double lambda = Double.NaN;
		public double lpfSqSum = Double.NaN;
		public double lpfWeightSum = Double.NaN;
		public double lpfX = Double.NaN;
		public double lpfY = Double.NaN;
		public double lpfA = Double.NaN;
		public double lpfT = Double.NaN;
	}

	private static final Object MCP_PROGRESS_LOCK = new Object();
	private static final ProgressSnapshot MCP_PROGRESS_ACTIVE = new ProgressSnapshot();
	private static final ProgressSnapshot MCP_PROGRESS_LAST = new ProgressSnapshot();

	public static ProgressSnapshot getProgressSnapshot() {
		final ProgressSnapshot out = new ProgressSnapshot();
		synchronized (MCP_PROGRESS_LOCK) {
			final ProgressSnapshot src = MCP_PROGRESS_ACTIVE.active ? MCP_PROGRESS_ACTIVE : MCP_PROGRESS_LAST;
			copyProgressSnapshot(
					out,
					src);
		}
		return out;
	}

	private static void clearProgressSnapshot(final ProgressSnapshot dst) {
		dst.active = false;
		dst.status = "idle";
		dst.stage = "";
		dst.startedMs = 0L;
		dst.updatedMs = 0L;
		dst.finishedMs = 0L;
		dst.centerIndex = -1;
		dst.firstScene = -1;
		dst.lastScene = -1;
		dst.outerIterations = 0;
		dst.innerIterations = 0;
		dst.outer = -1;
		dst.inner = -1;
		dst.cachedPairs = 0;
		dst.correlationSolved = 0;
		dst.correlationFailed = 0;
		dst.tilesPerPair = 0;
		dst.solvedScenes = 0;
		dst.failedScenes = 0;
		dst.solvedPairs = 0;
		dst.failedPairs = 0;
		dst.pcgIter = 0;
		dst.avgPairRms = Double.NaN;
		dst.avgPairRmsPure = Double.NaN;
		dst.maxDelta = Double.NaN;
		dst.lambda = Double.NaN;
		dst.lpfSqSum = Double.NaN;
		dst.lpfWeightSum = Double.NaN;
		dst.lpfX = Double.NaN;
		dst.lpfY = Double.NaN;
		dst.lpfA = Double.NaN;
		dst.lpfT = Double.NaN;
	}

	private static void copyProgressSnapshot(
			final ProgressSnapshot dst,
			final ProgressSnapshot src) {
		dst.active = src.active;
		dst.status = src.status;
		dst.stage = src.stage;
		dst.startedMs = src.startedMs;
		dst.updatedMs = src.updatedMs;
		dst.finishedMs = src.finishedMs;
		dst.centerIndex = src.centerIndex;
		dst.firstScene = src.firstScene;
		dst.lastScene = src.lastScene;
		dst.outerIterations = src.outerIterations;
		dst.innerIterations = src.innerIterations;
		dst.outer = src.outer;
		dst.inner = src.inner;
		dst.cachedPairs = src.cachedPairs;
		dst.correlationSolved = src.correlationSolved;
		dst.correlationFailed = src.correlationFailed;
		dst.tilesPerPair = src.tilesPerPair;
		dst.solvedScenes = src.solvedScenes;
		dst.failedScenes = src.failedScenes;
		dst.solvedPairs = src.solvedPairs;
		dst.failedPairs = src.failedPairs;
		dst.pcgIter = src.pcgIter;
		dst.avgPairRms = src.avgPairRms;
		dst.avgPairRmsPure = src.avgPairRmsPure;
		dst.maxDelta = src.maxDelta;
		dst.lambda = src.lambda;
		dst.lpfSqSum = src.lpfSqSum;
		dst.lpfWeightSum = src.lpfWeightSum;
		dst.lpfX = src.lpfX;
		dst.lpfY = src.lpfY;
		dst.lpfA = src.lpfA;
		dst.lpfT = src.lpfT;
	}

	private static double getParamOrNaN(
			final double[] params,
			final int index) {
		if ((params == null) || (index < 0) || (index >= params.length)) {
			return Double.NaN;
		}
		return params[index];
	}

	private static void startProgress(
			final int centerIndex,
			final int firstScene,
			final int lastScene,
			final Options options,
			final double[] paramLpf) {
		synchronized (MCP_PROGRESS_LOCK) {
			clearProgressSnapshot(MCP_PROGRESS_ACTIVE);
			final long now = System.currentTimeMillis();
			MCP_PROGRESS_ACTIVE.active = true;
			MCP_PROGRESS_ACTIVE.status = "running";
			MCP_PROGRESS_ACTIVE.stage = "setup";
			MCP_PROGRESS_ACTIVE.startedMs = now;
			MCP_PROGRESS_ACTIVE.updatedMs = now;
			MCP_PROGRESS_ACTIVE.centerIndex = centerIndex;
			MCP_PROGRESS_ACTIVE.firstScene = firstScene;
			MCP_PROGRESS_ACTIVE.lastScene = lastScene;
			MCP_PROGRESS_ACTIVE.outerIterations = (options == null) ? 0 : options.outerIterations;
			MCP_PROGRESS_ACTIVE.innerIterations = (options == null) ? 0 : options.innerIterations;
			MCP_PROGRESS_ACTIVE.lpfX = getParamOrNaN(
					paramLpf,
					ErsCorrection.DP_DSX);
			MCP_PROGRESS_ACTIVE.lpfY = getParamOrNaN(
					paramLpf,
					ErsCorrection.DP_DSY);
			MCP_PROGRESS_ACTIVE.lpfA = getParamOrNaN(
					paramLpf,
					ErsCorrection.DP_DSAZ);
			MCP_PROGRESS_ACTIVE.lpfT = getParamOrNaN(
					paramLpf,
					ErsCorrection.DP_DSTL);
		}
	}

	private static void markProgressOuterCache(
			final int outer,
			final ObservationCache obsCache) {
		synchronized (MCP_PROGRESS_LOCK) {
			if (!MCP_PROGRESS_ACTIVE.active) {
				return;
			}
			MCP_PROGRESS_ACTIVE.stage = "outer-cache";
			MCP_PROGRESS_ACTIVE.outer = outer;
			MCP_PROGRESS_ACTIVE.inner = -1;
			MCP_PROGRESS_ACTIVE.cachedPairs = (obsCache == null) ? 0 : obsCache.observations.size();
			MCP_PROGRESS_ACTIVE.correlationSolved = (obsCache == null) ? 0 : obsCache.solvedPairs;
			MCP_PROGRESS_ACTIVE.correlationFailed = (obsCache == null) ? 0 : obsCache.failedPairs;
			MCP_PROGRESS_ACTIVE.tilesPerPair = (obsCache == null) ? 0 : obsCache.tilesPerPair;
			MCP_PROGRESS_ACTIVE.updatedMs = System.currentTimeMillis();
		}
	}

	private static void markProgressInner(
			final int outer,
			final int inner,
			final int solvedScenes,
			final int failedScenes,
			final int solvedPairs,
			final int failedPairs,
			final double avgPairRms,
			final double avgPairRmsPure,
			final double maxDelta,
			final int pcgIter,
			final double lambda,
			final double lpfSqSum,
			final double lpfWeightSum) {
		synchronized (MCP_PROGRESS_LOCK) {
			if (!MCP_PROGRESS_ACTIVE.active) {
				return;
			}
			MCP_PROGRESS_ACTIVE.stage = "inner";
			MCP_PROGRESS_ACTIVE.outer = outer;
			MCP_PROGRESS_ACTIVE.inner = inner;
			MCP_PROGRESS_ACTIVE.solvedScenes = solvedScenes;
			MCP_PROGRESS_ACTIVE.failedScenes = failedScenes;
			MCP_PROGRESS_ACTIVE.solvedPairs = solvedPairs;
			MCP_PROGRESS_ACTIVE.failedPairs = failedPairs;
			MCP_PROGRESS_ACTIVE.avgPairRms = avgPairRms;
			MCP_PROGRESS_ACTIVE.avgPairRmsPure = avgPairRmsPure;
			MCP_PROGRESS_ACTIVE.maxDelta = maxDelta;
			MCP_PROGRESS_ACTIVE.pcgIter = pcgIter;
			MCP_PROGRESS_ACTIVE.lambda = lambda;
			MCP_PROGRESS_ACTIVE.lpfSqSum = lpfSqSum;
			MCP_PROGRESS_ACTIVE.lpfWeightSum = lpfWeightSum;
			MCP_PROGRESS_ACTIVE.updatedMs = System.currentTimeMillis();
		}
	}

	private static void finishProgress(
			final String status,
			final Result result) {
		synchronized (MCP_PROGRESS_LOCK) {
			if (!MCP_PROGRESS_ACTIVE.active) {
				return;
			}
			final long now = System.currentTimeMillis();
			MCP_PROGRESS_ACTIVE.active = false;
			MCP_PROGRESS_ACTIVE.status = (status == null) ? "done" : status;
			MCP_PROGRESS_ACTIVE.stage = "finished";
			MCP_PROGRESS_ACTIVE.finishedMs = now;
			MCP_PROGRESS_ACTIVE.updatedMs = now;
			if (result != null) {
				MCP_PROGRESS_ACTIVE.solvedScenes = result.solvedScenes;
				MCP_PROGRESS_ACTIVE.failedScenes = result.failedScenes;
				MCP_PROGRESS_ACTIVE.avgPairRms = result.avgPairRms;
				MCP_PROGRESS_ACTIVE.avgPairRmsPure = result.avgPairRmsPure;
				MCP_PROGRESS_ACTIVE.maxDelta = result.maxDelta;
				MCP_PROGRESS_ACTIVE.pcgIter = result.pcgIterationsLast;
			}
			copyProgressSnapshot(
					MCP_PROGRESS_LAST,
					MCP_PROGRESS_ACTIVE);
			clearProgressSnapshot(MCP_PROGRESS_ACTIVE);
		}
	}

	private static class LocalSystem {
		boolean valid = false;
		double[] rms = null;
	}

	/**
	 * One correlation factor: match {@code sceneIndex} to {@code refSceneIndex}.
	 * In phase-1 global refinement this is usually center-reference.
	 * For FPN mitigation we add a few non-center parent references.
	 */
	private static class PairFactor {
		int sceneIndex;
		int refSceneIndex;
		boolean nonCenterReference;
		double pairWeight;

		PairFactor(
				final int sceneIndex,
				final int refSceneIndex,
				final boolean nonCenterReference,
				final double pairWeight) {
			this.sceneIndex = sceneIndex;
			this.refSceneIndex = refSceneIndex;
			this.nonCenterReference = nonCenterReference;
			this.pairWeight = pairWeight;
		}
	}

	private static class PairObservation {
		int sceneIndex;
		int refSceneIndex;
		double pairWeight = 1.0;
		double[][] centers = null;
		double[][] eigen = null;
		double[][] vectorXys = null;
		boolean[] reliableRef = null;
		int yBase = 0; // base in ObservationCache.yVector (2 values per tile: dx,dy)
		int wBase = 0; // base in ObservationCache.wVector (1 value per tile: strength)
	}

	private static class ObservationCache {
		ArrayList<PairObservation> observations = new ArrayList<PairObservation>();
		double[] yVector = null; // flat: [pair][2*tile + {0,1}] -> {dx,dy}
		double[] wVector = null; // flat: [pair][tile] -> strength
		int tilesPerPair = 0;
		int solvedPairs = 0;
		int failedPairs = 0;
	}

	private static class RefGpuData {
		int refSceneIndex = -1;
		double[][] pXpYD = null;
		TpTask[] tasks = null;
		boolean[] reliableMask = null;
	}

	private static class RateTerm {
		int sceneIndex;
		double coeff;

		RateTerm(
				final int sceneIndex,
				final double coeff) {
			this.sceneIndex = sceneIndex;
			this.coeff = coeff;
		}
	}

	private static class GlobalTerm {
		int varIndex;
		int parIndex;
		double coeff;

		GlobalTerm(
				final int varIndex,
				final int parIndex,
				final double coeff) {
			this.varIndex = varIndex;
			this.parIndex = parIndex;
			this.coeff = coeff;
		}
	}

	/**
	 * Run global sparse refinement for scenes in [earliestScene..lastScene], excluding centerIndex.
	 *
	 * @param clt_parameters processing parameters
	 * @param quadCLTs scene array
	 * @param centerCLT fixed reference scene
	 * @param centerIndex index of reference scene in quadCLTs
	 * @param earliestScene earliest processed scene index (inclusive)
	 * @param lastScene last processed scene index (inclusive)
	 * @param scenes_xyzatr per-scene poses [scene][{xyz,atr}][3], updated in place
	 * @param scenes_xyzatr_pull optional per-scene pull targets [scene][{xyz,atr}][3] for regularization
	 * @param paramSelect enabled parameters mask, full DP_NUM_PARS size (scene pose subset is used here)
	 * @param paramRegweights local per-parameter regularization weights, full DP_NUM_PARS size
	 * @param paramLpf inter-scene low-pass curvature controls, full DP_NUM_PARS size
	 *        (converted to weights as {@code w = control^2})
	 * @param centerDisparity reference disparity map ({@code null -> centerCLT current})
	 * @param reliableRef optional tile reliability mask for reference
	 * @param disableErs true to disable ERS-related parameters in local solves
	 * @param mb_max_gain motion blur gain limit
	 * @param options global solver options
	 * @param debugLevel debug level
	 * @return summary result
	 */
	public static Result refineAllToReference(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT,
			final int centerIndex,
			final int earliestScene,
			final int lastScene,
			final double[][][] scenes_xyzatr,
			final double[][][] scenes_xyzatr_pull,
			final boolean[] paramSelect,
			final double[] paramRegweights,
			final double[] paramLpf,
			final double[] centerDisparity,
			final boolean[] reliableRef,
			final boolean disableErs,
			final double mb_max_gain,
			final Options options,
			final int debugLevel) {
		final Result result = new Result();
		if ((quadCLTs == null) || (centerCLT == null) || (scenes_xyzatr == null) || (options == null)) {
			return result;
		}
		// Local debug trigger for cached observation hyperstack dump.
		// Keep this non-final so it can be flipped in debugger after a breakpoint.
		boolean debugDumpObservationHyperstack = (debugLevel < 1000);
		boolean debugShowObservationHyperstack = false;
		if (options.debugDumpObservationHyperstack) {
			debugDumpObservationHyperstack = true;
			debugShowObservationHyperstack = options.debugShowObservationHyperstack;
		}
		if (options.saveInitialFinalOnly) {
			debugDumpObservationHyperstack = false;
		}
		final boolean dumpOuterCsv = !options.saveInitialFinalOnly;
		final int firstScene = Math.max(0, earliestScene);
		final int endScene = Math.min(lastScene, quadCLTs.length - 1);
		if (endScene < firstScene) {
			return result;
		}
		final int nvars = endScene - firstScene + 1;
		final boolean[] activePoseScene = new boolean[nvars];
		int numActive = 0;
		for (int nscene = firstScene; nscene <= endScene; nscene++) {
			final int ivar = nscene - firstScene;
			activePoseScene[ivar] = (nscene != centerIndex) &&
					(quadCLTs[nscene] != null) &&
					(scenes_xyzatr[nscene] != null);
			if (activePoseScene[ivar]) {
				numActive++;
			}
		}
		if (numActive == 0) {
			return result;
		}
		final double[] refDisparity = (centerDisparity != null) ? centerDisparity : centerCLT.getDLS()[clt_parameters.imp.use_lma_dsi ? 1 : 0];
		final double[][] pXpYDCenter = OpticalFlow.transformToScenePxPyD(
				null,
				refDisparity,
				Interscene.ZERO3,
				Interscene.ZERO3,
				centerCLT,
				centerCLT);
		final double[] sceneTimes = getSceneTimes(
				quadCLTs,
				firstScene,
				endScene);

		final boolean[] lmaParamSelect = getSceneParamMask(
				paramSelect,
				disableErs);
		final int[] sceneParIndices = getActiveSceneParameterIndices(lmaParamSelect);
		final int npars = sceneParIndices.length;
		if (npars == 0) {
			if (debugLevel > -2) {
				System.out.println("IntersceneGlobalRefine: no active scene parameters selected, nothing to optimize.");
			}
			return result;
		}
		final double[] lmaParamRegweights = new double[ErsCorrection.DP_NUM_PARS];
		if (paramRegweights != null) {
			System.arraycopy(
					paramRegweights,
					0,
					lmaParamRegweights,
					0,
					Math.min(paramRegweights.length, lmaParamRegweights.length));
		}
		zeroRateRegularization(lmaParamRegweights);
		final double[][][] pullScenePoses = normalizePullScenePoses(
				scenes_xyzatr_pull,
				scenes_xyzatr);
		double lpfXyzScale = Double.NaN;
		if (centerCLT != null) {
			lpfXyzScale = Math.abs(centerCLT.getAverageZ(true));
		}
		if (!(lpfXyzScale > MIN_DIAG)) {
			// Fallback when center altitude is unavailable.
			lpfXyzScale = getAverageAbsZ(
					scenes_xyzatr,
					activePoseScene,
					firstScene);
		}
		final LpfSetup sceneLpfSetup = getSceneLpfSetup(
				sceneParIndices,
				paramLpf,
				options,
				lpfXyzScale);
		if (debugLevel > -4) {
			final double invSq = (lpfXyzScale > MIN_DIAG) ? (1.0 / (lpfXyzScale * lpfXyzScale)) : 1.0;
			System.out.println(
					"IntersceneGlobalRefine: LPF normalization XYZ scale=" + lpfXyzScale +
					", invScaleSq=" + invSq +
					" (ATR scale=1, effective weight w=lpf^2)");
			System.out.println(
					"IntersceneGlobalRefine: LPF base effective weights " +
					formatLpfEffectiveWeights(
							sceneParIndices,
							sceneLpfSetup.baseWeights));
		}
		startProgress(
				centerIndex,
				firstScene,
				endScene,
				options,
				paramLpf);
		final double[] x = new double[nvars * npars];
		for (int nscene = firstScene; nscene <= endScene; nscene++) {
			final int ivar = nscene - firstScene;
			final double[][] pose = getScenePose(
					scenes_xyzatr,
					nscene,
					centerIndex);
			setSceneState(
					x,
					ivar,
					getSceneVector(
							pose,
							sceneParIndices),
					npars);
		}
		final double[][][] scenesInitial = cloneScenePoses(scenes_xyzatr);
		ObservationCache firstObsCache = null;
		ObservationCache lastObsCache = null;
		double[][][] firstScenesYbase = null;
		double[][][] firstScenesRates = null;
		double[][][] lastScenesYbase = null;
		double[][][] lastScenesRates = null;
		boolean firstIncludeRateChain = false;
		boolean lastIncludeRateChain = false;

		boolean extraOuterAfterConvergedUsed = false;
		boolean waitingExtraOuterDecision = false;
		double extraOuterBaselineRmsPure = Double.NaN;
		int extraOuterBaselineIndex = -1;
		for (int outer = 0; outer < options.outerIterations; outer++) {
			final double[][][] scenesBeforeOuter = cloneScenePoses(scenes_xyzatr);
			final double[][][] scenesForInnerRates = options.freezeRateChainInInner ? scenesBeforeOuter : scenes_xyzatr;
			final boolean includeRateChain = !options.freezeRateChainInInner;
			final double[] minMax = {options.minOffset, options.maxOffset, Double.NaN};
			final ArrayList<PairFactor> pairFactors = buildPairFactors(
					clt_parameters,
					quadCLTs,
					scenes_xyzatr,
					activePoseScene,
					firstScene,
					endScene,
					centerIndex,
					options.minOffset,
					debugLevel);
			applyPairWeights(
					pairFactors,
					centerIndex,
					options.centerPairWeightMode,
					debugLevel);
			final RefGpuData[] refGpuCache = new RefGpuData[nvars];
			final ObservationCache obsCache = buildObservationCache(
					clt_parameters,
					quadCLTs,
					centerCLT,
					centerIndex,
					firstScene,
					endScene,
					scenes_xyzatr,
					sceneTimes,
					pairFactors,
					refDisparity,
					pXpYDCenter,
					reliableRef,
					mb_max_gain,
					minMax,
					refGpuCache,
					debugLevel);
			if (obsCache.observations.isEmpty()) {
				break;
			}
			if (firstObsCache == null) {
				firstObsCache = obsCache;
				firstScenesYbase = scenesBeforeOuter;
				firstScenesRates = scenesForInnerRates;
				firstIncludeRateChain = includeRateChain;
			}
			lastObsCache = obsCache;
			lastScenesYbase = scenesBeforeOuter;
			lastScenesRates = scenesForInnerRates;
			lastIncludeRateChain = includeRateChain;
			markProgressOuterCache(
					outer,
					obsCache);
			if (debugLevel > -3) {
				System.out.println(
						"IntersceneGlobalRefine: outer=" + outer +
						", cachedPairs=" + obsCache.observations.size() +
						", correlationSolved=" + obsCache.solvedPairs +
						", correlationFailed=" + obsCache.failedPairs +
						", tilesPerPair=" + obsCache.tilesPerPair);
			}
			if (debugDumpObservationHyperstack) {
				dumpObservationHyperstack(
						obsCache,
						quadCLTs,
						centerCLT,
						outer,
						debugShowObservationHyperstack);
			}
			// Global block-banded normal equation storage:
			// diag[i]     -> H(i,i)
			// offDiag1[i] -> H(i,i+1)
			// offDiag2[i] -> H(i,i+2)
			// rhs[i]      -> b(i)
			final double[][][] diag = new double[nvars][npars][npars];
			final double[][] rhs = new double[nvars][npars];
			final double[][][] offDiag1 = new double[Math.max(0, nvars - 1)][npars][npars];
			final double[][][] offDiag2 = new double[Math.max(0, nvars - 2)][npars][npars];
			final double[] delta = new double[nvars * npars];
			final int innerIterations = Math.max(1, options.innerIterations);
			double maxDeltaOuter = Double.NaN;
			double avgPairRmsOuter = Double.NaN;
			double avgPairRmsPureOuter = Double.NaN;
			int solvedOuter = 0;
			int failedOuter = numActive;
			int pcgIterOuter = 0;
			double avgPairRmsInitial = Double.NaN;
			double avgPairRmsPureInitial = Double.NaN;
			double lambda = options.lambdaDiag;
			boolean outerAborted = false;
			double[] sceneParamLpf = sceneLpfSetup.baseWeights.clone();
			boolean lpfAutoNormalized = false;
			final boolean collectInnerHistory = debugDumpObservationHyperstack || dumpOuterCsv;
			final ArrayList<double[][][]> innerSceneHistory = collectInnerHistory ?
					new ArrayList<double[][][]>() : null;
			final ArrayList<double[]> innerPairRmsHistory = collectInnerHistory ?
					new ArrayList<double[]>() : null;
			if (innerSceneHistory != null) {
				// State before first inner iteration (plain columns without suffix).
				innerSceneHistory.add(cloneScenePoses(scenes_xyzatr));
			}
			boolean outerConverged = false;
			for (int inner = 0; inner < innerIterations; inner++) {
				zeroNormalBlocks(
						diag,
						offDiag1,
						offDiag2,
						rhs);
				Arrays.fill(
						delta,
						0.0);
				final double[] pairRmsInner = debugDumpObservationHyperstack ?
						new double[obsCache.observations.size()] : null;
				if (pairRmsInner != null) {
					Arrays.fill(
							pairRmsInner,
							Double.NaN);
				}
				int solvedPairs = 0;
				int failedPairs = 0;
				double rmsSqSum = 0.0;
				double rmsPureSqSum = 0.0;
				final boolean[] sceneSolved = new boolean[nvars];
				for (int iobs = 0; iobs < obsCache.observations.size(); iobs++) {
					final PairObservation obs = obsCache.observations.get(iobs);
					if (!(obs.pairWeight > 0.0)) {
						continue;
					}
					final int ivar = obs.sceneIndex - firstScene;
					if ((ivar < 0) || (ivar >= nvars) || !activePoseScene[ivar]) {
						continue;
					}
						final LocalSystem ls = accumulateCachedLocalSystem(
								clt_parameters,
								quadCLTs,
								centerIndex,
								obs,
								firstScene,
								endScene,
								scenes_xyzatr,
								pullScenePoses,
								scenesBeforeOuter,
								scenesForInnerRates,
								sceneTimes,
								paramSelect,
							lmaParamRegweights,
							sceneParIndices,
							activePoseScene,
							disableErs,
							includeRateChain,
							true,
							diag,
							offDiag1,
							offDiag2,
							rhs,
							debugLevel);
					if (ls.valid) {
						solvedPairs++;
						if (ls.rms != null) {
							final double rms = ls.rms[0];
							final double rmsPure = (ls.rms.length > 1) ? ls.rms[1] : ls.rms[0];
							rmsSqSum += rms * rms;
							rmsPureSqSum += rmsPure * rmsPure;
						}
						if (pairRmsInner != null) {
							pairRmsInner[iobs] = ((ls.rms != null) && (ls.rms.length > 1)) ? ls.rms[1] :
								((ls.rms != null) ? ls.rms[0] : Double.NaN);
						}
						sceneSolved[ivar] = true;
					} else {
						failedPairs++;
					}
				}
				int solved = 0;
				for (int i = 0; i < nvars; i++) {
					if (activePoseScene[i] && sceneSolved[i]) {
						solved++;
					}
				}
				final int failed = numActive - solved;
				final double avgPairRmsPure = (solvedPairs > 0) ? Math.sqrt(rmsPureSqSum / solvedPairs) : Double.NaN;
				if (!lpfAutoNormalized && (solvedPairs > 0)) {
					final double[] pairPixelSensitivity = estimatePairPixelSensitivity(
							diag,
							activePoseScene,
							npars);
					sceneParamLpf = autoNormalizeSceneLpfWeights(
							x,
							sceneLpfSetup,
							pairPixelSensitivity,
							activePoseScene,
							npars,
							rmsPureSqSum,
							solvedPairs);
					lpfAutoNormalized = true;
					if (debugLevel > -3) {
						System.out.println(
								"IntersceneGlobalRefine: LPF self-normalized from current data " +
								"(uses per-parameter pixel sensitivity, w=lpf^2), pairSens " +
								formatLpfEffectiveWeights(
										sceneParIndices,
										pairPixelSensitivity) +
								", effective weights " +
								formatLpfEffectiveWeights(
										sceneParIndices,
										sceneParamLpf));
					}
				}
				final double[] lpfSqByPar = new double[npars];
				final double[] lpfWeightByPar = new double[npars];
				final double[] lpfStats = getCurvatureStats(
						x,
						sceneParamLpf,
						activePoseScene,
						npars,
						lpfSqByPar,
						lpfWeightByPar);
				final double lpfSqSum = lpfStats[0];
				final double lpfWeightSum = lpfStats[1];
				// Keep decision/log metric in pair-RMS units: LPF contributes to numerator only.
				// Using LPF weight sum in denominator can collapse RMS when LPF weights are large.
				final double avgPairRmsDecision = (solvedPairs > 0) ?
						Math.sqrt((rmsPureSqSum + lpfSqSum) / solvedPairs) :
						Double.NaN;
				final double avgPairRms = (solvedPairs > 0) ?
						Math.sqrt((rmsSqSum + lpfSqSum) / solvedPairs) :
						Double.NaN;
				if (Double.isNaN(avgPairRmsInitial)) {
					avgPairRmsInitial = avgPairRmsDecision;
					avgPairRmsPureInitial = avgPairRmsPure;
				}
				if (solvedPairs == 0) {
					avgPairRmsOuter = avgPairRmsDecision;
					avgPairRmsPureOuter = avgPairRmsPure;
					markProgressInner(
							outer,
							inner,
							solved,
							failed,
							solvedPairs,
							failedPairs,
							avgPairRmsOuter,
							avgPairRmsPureOuter,
							0.0,
							0,
							lambda,
							lpfSqSum,
							lpfWeightSum);
					break;
				}

				assembleCurvatureBlocks(
						x,
						diag,
						offDiag1,
						offDiag2,
						rhs,
						sceneParamLpf,
						activePoseScene,
						npars);
				regularizeDiagonalAndFixInactive(
						diag,
						rhs,
						lambda,
						activePoseScene,
						npars);
				final int pcgIter = solvePcg(
						diag,
						offDiag1,
						offDiag2,
						rhs,
						delta,
						options.pcgIterations,
						options.pcgTolerance,
						npars);
				pcgIterOuter = pcgIter;
				final double maxDelta = applyDeltaToState(
						x,
						scenes_xyzatr,
						delta,
						1.0,
						firstScene,
						endScene,
						activePoseScene,
						npars,
						sceneParIndices);
					final double[] pairRmsTrial = evaluateCachedPairRms(
							clt_parameters,
							quadCLTs,
							centerIndex,
						obsCache,
							firstScene,
							endScene,
							scenes_xyzatr,
							pullScenePoses,
							scenesBeforeOuter,
							scenesForInnerRates,
							sceneTimes,
						paramSelect,
						lmaParamRegweights,
						sceneParIndices,
						activePoseScene,
						disableErs,
						includeRateChain,
						debugLevel);
				double trialRmsSqSum = 0.0;
				int trialSolvedPairs = 0;
				if (pairRmsTrial != null) {
					for (int i = 0; i < pairRmsTrial.length; i++) {
						final double d = pairRmsTrial[i];
						if (Double.isNaN(d)) {
							continue;
						}
						trialSolvedPairs++;
						trialRmsSqSum += d * d;
					}
				}
				final double[] trialLpfSqByPar = new double[npars];
				final double[] trialLpfWeightByPar = new double[npars];
				final double[] lpfStatsTrial = getCurvatureStats(
						x,
						sceneParamLpf,
						activePoseScene,
						npars,
						trialLpfSqByPar,
						trialLpfWeightByPar);
				final double trialLpfSqSum = lpfStatsTrial[0];
				final double trialLpfWeightSum = lpfStatsTrial[1];
				final double trialAvgPairRmsPure = (trialSolvedPairs > 0) ?
						Math.sqrt(trialRmsSqSum / trialSolvedPairs) : Double.NaN;
				final double trialAvgPairRms = (trialSolvedPairs > 0) ?
						Math.sqrt((trialRmsSqSum + trialLpfSqSum) / trialSolvedPairs) :
						Double.NaN;
				final boolean accepted = !Double.isNaN(trialAvgPairRms) &&
						(Double.isNaN(avgPairRmsDecision) || (trialAvgPairRms < avgPairRmsDecision));
				final boolean rmsConverged = accepted &&
						(options.rmsDiffStop > 0.0) &&
						!Double.isNaN(avgPairRmsDecision) &&
						(trialAvgPairRms >= (avgPairRmsDecision * (1.0 - options.rmsDiffStop)));
				final double lpfSqSumLog = accepted ? trialLpfSqSum : lpfSqSum;
				final double lpfWeightSumLog = accepted ? trialLpfWeightSum : lpfWeightSum;
				final String lpfBreakdownLog = formatCurvatureBreakdown(
						sceneParIndices,
						accepted ? trialLpfSqByPar : lpfSqByPar,
						accepted ? trialLpfWeightByPar : lpfWeightByPar);
				if (accepted) {
					lambda *= options.lambdaScaleGood;
				} else {
					applyDeltaToState(
							x,
							scenes_xyzatr,
							delta,
							-1.0,
							firstScene,
							endScene,
							activePoseScene,
							npars,
							sceneParIndices);
					lambda *= options.lambdaScaleBad;
				}
				if (lambda > options.lambdaMax) {
					if (debugLevel > -3) {
						System.out.println(
								"IntersceneGlobalRefine: outer=" + outer +
								" inner=" + inner +
								" lambda exceeded max (" + lambda + " > " + options.lambdaMax + "), aborting outer pass");
					}
					outerAborted = true;
				}
				maxDeltaOuter = accepted ? maxDelta : 0.0;
				avgPairRmsOuter = accepted ? trialAvgPairRms : avgPairRmsDecision;
				avgPairRmsPureOuter = accepted ? trialAvgPairRmsPure : avgPairRmsPure;
				solvedOuter = solved;
				failedOuter = failed;
				markProgressInner(
						outer,
						inner,
						solved,
						failed,
						solvedPairs,
						failedPairs,
						avgPairRmsOuter,
						avgPairRmsPureOuter,
						maxDeltaOuter,
						pcgIter,
						lambda,
						lpfSqSumLog,
						lpfWeightSumLog);
				if (innerPairRmsHistory != null) {
					innerPairRmsHistory.add(accepted ?
							((pairRmsTrial == null) ? null : pairRmsTrial.clone()) :
							((pairRmsInner == null) ? null : pairRmsInner.clone()));
				}
					if (debugLevel > -3) {
						System.out.println(
								"IntersceneGlobalRefine: outer=" + outer +
								" inner=" + inner +
								" solvedScenes=" + solved +
								" failedScenes=" + failed +
								" solvedPairs=" + solvedPairs +
								" failedPairs=" + failedPairs +
								" avgPairRms=" + avgPairRmsOuter + " (" + avgPairRmsInitial + ")" +
								" avgPairRmsPure=" + avgPairRmsPureOuter + " (" + avgPairRmsPureInitial + ")" +
								" lpfSqSum=" + lpfSqSumLog +
								" lpfWeightSum=" + lpfWeightSumLog +
								" " + lpfBreakdownLog +
								" maxDelta=" + maxDeltaOuter +
								" pcgIter=" + pcgIter +
								" lambda=" + lambda);
					}
				if (outerAborted) {
					break;
				}
					if (accepted && ((maxDelta < options.deltaStop) || rmsConverged)) {
						if (innerSceneHistory != null) {
							// State after this inner update (suffix -inner).
							innerSceneHistory.add(cloneScenePoses(scenes_xyzatr));
						}
						outerConverged = true;
						break;
					}
					if (innerSceneHistory != null) {
							// State after this inner update (suffix -inner).
							innerSceneHistory.add(cloneScenePoses(scenes_xyzatr));
					}
				}
				result.maxDelta = maxDeltaOuter;
				result.outerDone = outer + 1;
				result.solvedScenes = solvedOuter;
				result.failedScenes = failedOuter;
				result.avgPairRms = avgPairRmsOuter;
				result.avgPairRmsPure = avgPairRmsPureOuter;
				result.pcgIterationsLast = pcgIterOuter;
				if ((innerSceneHistory != null) && (innerPairRmsHistory != null)) {
					if ((innerSceneHistory != null) && (innerPairRmsHistory != null) &&
							(innerPairRmsHistory.size() < innerSceneHistory.size())) {
						innerPairRmsHistory.add(
									evaluateCachedPairRms(
											clt_parameters,
											quadCLTs,
											centerIndex,
											obsCache,
										firstScene,
										endScene,
										scenes_xyzatr,
										pullScenePoses,
										scenesBeforeOuter,
										scenesForInnerRates,
										sceneTimes,
											paramSelect,
										lmaParamRegweights,
										sceneParIndices,
										activePoseScene,
										disableErs,
										includeRateChain,
										debugLevel));
					}
					dumpObservationCsv(
							obsCache,
							innerSceneHistory,
							innerPairRmsHistory,
							options.testInnerResultsCsv,
							firstScene,
							endScene,
							centerIndex,
							quadCLTs,
							centerCLT,
							outer);
				}
				if (outerAborted) {
					if (debugLevel > -3) {
						System.out.println(
								"IntersceneGlobalRefine: aborting after outer=" + outer +
								", saved available per-outer CSV state.");
					}
					break;
				}
				if (waitingExtraOuterDecision && !outerConverged) {
					waitingExtraOuterDecision = false;
					if (debugLevel > -3) {
						System.out.println(
								"IntersceneGlobalRefine: extra outer pass at outer=" + outer +
								" did not converge yet, continuing normal outer iterations.");
					}
				}
				if (outerConverged) {
				final boolean runExtraOuter =
						options.runOneMoreOuterAfterConverged &&
						!extraOuterAfterConvergedUsed &&
						((outer + 1) < options.outerIterations);
				if (runExtraOuter) {
					extraOuterAfterConvergedUsed = true;
					waitingExtraOuterDecision = true;
					extraOuterBaselineRmsPure = avgPairRmsPureOuter;
					extraOuterBaselineIndex = outer;
					if (debugLevel > -3) {
						System.out.println(
								"IntersceneGlobalRefine: outer converged at outer=" + outer +
								", running one extra outer pass for recorrelation test" +
								" (baseline avgPairRmsPure=" + extraOuterBaselineRmsPure + ")");
					}
					continue;
				}
				if (waitingExtraOuterDecision) {
					waitingExtraOuterDecision = false;
					final double denom = Math.max(
							Math.abs(extraOuterBaselineRmsPure),
							MIN_DIAG);
					final double relImprovement =
							(extraOuterBaselineRmsPure - avgPairRmsPureOuter) / denom;
					final boolean significantImprovement =
							Double.isFinite(relImprovement) &&
							(relImprovement > options.rmsDiffStop);
					if (significantImprovement && ((outer + 1) < options.outerIterations)) {
						if (debugLevel > -3) {
							System.out.println(
									"IntersceneGlobalRefine: extra outer pass improved avgPairRmsPure by " +
									relImprovement + " (baseline outer=" + extraOuterBaselineIndex +
									", threshold=" + options.rmsDiffStop +
									"), continuing outer iterations.");
						}
						continue;
					}
					if (debugLevel > -3) {
						System.out.println(
								"IntersceneGlobalRefine: extra outer pass improvement=" + relImprovement +
								" (baseline outer=" + extraOuterBaselineIndex +
								", threshold=" + options.rmsDiffStop +
								"), stopping outer iterations.");
					}
				}
					break;
				}
			}
			if ((firstObsCache != null) && (lastObsCache != null)) {
				final double[][][] scenesFinal = cloneScenePoses(scenes_xyzatr);
				final double[][][] initialYbase = (firstScenesYbase != null) ? firstScenesYbase : scenesInitial;
				final double[][][] initialRates = (firstScenesRates != null) ? firstScenesRates : scenesInitial;
				final double[][][] finalYbase = (lastScenesYbase != null) ? lastScenesYbase : scenesFinal;
				final double[][][] finalRates = (lastScenesRates != null) ? lastScenesRates : scenesFinal;
				final double[] pairRmsInitial = evaluateCachedPairRms(
						clt_parameters,
						quadCLTs,
						centerIndex,
						firstObsCache,
						firstScene,
						endScene,
						scenesInitial,
						pullScenePoses,
						initialYbase,
						initialRates,
						sceneTimes,
						paramSelect,
						lmaParamRegweights,
						sceneParIndices,
						activePoseScene,
						disableErs,
						firstIncludeRateChain,
						debugLevel);
				final double[] pairRmsFinal = evaluateCachedPairRms(
						clt_parameters,
						quadCLTs,
						centerIndex,
						lastObsCache,
						firstScene,
						endScene,
						scenesFinal,
						pullScenePoses,
						finalYbase,
						finalRates,
						sceneTimes,
						paramSelect,
						lmaParamRegweights,
						sceneParIndices,
						activePoseScene,
						disableErs,
						lastIncludeRateChain,
						debugLevel);
				dumpObservationCsvInitialFinal(
						firstObsCache,
						pairRmsInitial,
						scenesInitial,
						lastObsCache,
						pairRmsFinal,
						scenesFinal,
						firstScene,
						endScene,
						centerIndex,
						quadCLTs,
						centerCLT);
			}
			if (debugLevel > -4) {
				System.out.println(String.format(
						"IntersceneGlobalRefine: finished, outerDone=%d, solved=%d, failed=%d, avgRms=%8.6f, avgRmsPure=%8.6f",
					result.outerDone,
					result.solvedScenes,
					result.failedScenes,
					result.avgPairRms,
					result.avgPairRmsPure));
		}
		finishProgress(
				"done",
				result);
		return result;
	}

	// -----------------------------------------------------------------------------
	// Pair/reference orchestration
	// -----------------------------------------------------------------------------

	/**
	 * Build pair factors for one outer iteration.
	 * Base set is scene-to-center for all active scenes.
	 * Optional add-on uses legacy {@link Interscene#getFPNPairs(...)} to add
	 * non-center parent references for FPN-prone scenes.
	 */
	private static ArrayList<PairFactor> buildPairFactors(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr,
			final boolean[] activePoseScene,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final double minOffset,
			final int debugLevel) {
		final ArrayList<PairFactor> factors = new ArrayList<PairFactor>();
		final HashSet<Long> used = new HashSet<Long>();

		for (int nscene = firstScene; nscene <= lastScene; nscene++) {
			final int ivar = nscene - firstScene;
			if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
				final long key = (((long) nscene) << 32) | (centerIndex & 0xffffffffL);
				if (used.add(key)) {
					factors.add(new PairFactor(
							nscene,
							centerIndex,
							false,
							1.0));
				}
			}
		}
		if (!clt_parameters.imp.fmg_initial_en) {
			return factors;
		}
		final double avgZ = getAverageAbsZ(
				scenes_xyzatr,
				activePoseScene,
				firstScene);
		final ArrayList<Integer> fpnList = new ArrayList<Integer>();
		for (int nscene = firstScene; nscene <= lastScene; nscene++) {
			final int ivar = nscene - firstScene;
			if ((ivar < 0) || (ivar >= activePoseScene.length) || !activePoseScene[ivar]) {
				continue;
			}
			double estShift = quadCLTs[centerIndex].estimateAverageShift(
					getScenePose(scenes_xyzatr, centerIndex, centerIndex),
					getScenePose(scenes_xyzatr, nscene, centerIndex),
					avgZ,
					false,
					clt_parameters.imp.fmg_rectilinear);
			if (estShift < minOffset) {
				estShift = quadCLTs[centerIndex].estimateAverageShift(
						getScenePose(scenes_xyzatr, centerIndex, centerIndex),
						getScenePose(scenes_xyzatr, nscene, centerIndex),
						avgZ,
						true,
						clt_parameters.imp.fmg_rectilinear);
			}
			if (estShift < minOffset) {
				fpnList.add(nscene);
			}
		}
		if (fpnList.isEmpty()) {
			return factors;
		}
		double fmgDistance = clt_parameters.imp.fmg_distance;
		if (fmgDistance < (minOffset + 2.0)) {
			fmgDistance = minOffset + 2.0;
		}
		final int[][] fpnPairs = Interscene.getFPNPairs(
				fpnList,
				fmgDistance,
				clt_parameters.imp.fmg_rectilinear,
				quadCLTs,
				scenes_xyzatr,
				avgZ,
				centerIndex,
				firstScene);
		int addedNonCenter = 0;
		for (int i = 0; i < fpnPairs.length; i++) {
			final int scene = fpnPairs[i][0];
			final int ref = fpnPairs[i][1];
			if ((scene < firstScene) || (scene > lastScene) || (ref < firstScene) || (ref > lastScene)) {
				continue;
			}
			if ((scene == ref) || (ref == centerIndex)) {
				continue;
			}
			final int ivarScene = scene - firstScene;
			final int ivarRef = ref - firstScene;
			if ((ivarScene < 0) || (ivarScene >= activePoseScene.length) || !activePoseScene[ivarScene]) {
				continue;
			}
			if ((ivarRef < 0) || (ivarRef >= activePoseScene.length) || !activePoseScene[ivarRef]) {
				continue;
			}
			if ((quadCLTs[ref] == null) || (scenes_xyzatr[ref] == null)) {
				continue;
			}
			final long key = (((long) scene) << 32) | (ref & 0xffffffffL);
			if (used.add(key)) {
				factors.add(new PairFactor(
						scene,
						ref,
						true,
						1.0));
				addedNonCenter++;
				if (debugLevel > 1) {
					System.out.println("IntersceneGlobalRefine: added non-center pair scene=" + scene + " ref=" + ref);
				}
			}
		}
		if (debugLevel > -3) {
			System.out.println("IntersceneGlobalRefine: pair factors total=" + factors.size() +
					", nonCenterAdded=" + addedNonCenter + ", fpnCandidates=" + fpnList.size());
		}
		return factors;
	}

	private static void applyPairWeights(
			final ArrayList<PairFactor> pairFactors,
			final int centerIndex,
			final int centerPairWeightMode,
			final int debugLevel) {
		if ((pairFactors == null) || pairFactors.isEmpty()) {
			return;
		}
		final int mode = Math.max(
				0,
				centerPairWeightMode);
		int numZero = 0;
		for (PairFactor pf : pairFactors) {
			double w = 1.0;
			if ((mode > 0) && (pf.refSceneIndex == centerIndex)) {
				final int d = Math.abs(pf.sceneIndex - centerIndex);
				if ((d > 0) && (d <= mode)) {
					w = 0.0;
				}
			}
			pf.pairWeight = w;
			if (!(w > 0.0)) {
				numZero++;
			}
		}
		if (debugLevel > -3) {
			System.out.println(
					"IntersceneGlobalRefine: pair weights applied, mode=" + mode +
					", zeroWeightedPairs=" + numZero + "/" + pairFactors.size());
		}
	}

	private static double getAverageAbsZ(
			final double[][][] scenes_xyzatr,
			final boolean[] activePoseScene,
			final int firstScene) {
		double sum = 0.0;
		int n = 0;
		for (int ivar = 0; ivar < activePoseScene.length; ivar++) {
			if (!activePoseScene[ivar]) {
				continue;
			}
			final int nscene = firstScene + ivar;
			if ((nscene >= 0) && (nscene < scenes_xyzatr.length) && (scenes_xyzatr[nscene] != null)) {
				sum += Math.abs(scenes_xyzatr[nscene][0][2]);
				n++;
			}
		}
		return (n > 0) ? Math.max(1.0, sum / n) : 1.0;
	}

	/**
	 * Build or fetch GPU reference data for one reference scene.
	 * Uses center disparity projected into the selected reference frame.
	 */
	private static RefGpuData getReferenceGpuData(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr,
			final double[] sceneTimes,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final int refScene,
			final double[] refDisparity,
			final double[][] pXpYDCenter,
			final boolean[] reliableRefCenter,
			final double mb_max_gain,
			final RefGpuData[] cache,
			final int debugLevel) {
		final int ivar = refScene - firstScene;
		if ((ivar >= 0) && (ivar < cache.length) && (cache[ivar] != null) &&
				(cache[ivar].refSceneIndex == refScene) && (cache[ivar].tasks != null) &&
				(cache[ivar].pXpYD != null)) {
			return cache[ivar];
		}
		if ((refScene < 0) || (refScene >= quadCLTs.length) || (quadCLTs[refScene] == null)) {
			return null;
		}
		final RefGpuData rslt = new RefGpuData();
		rslt.refSceneIndex = refScene;
		final double[][] refPose = getScenePose(
				scenes_xyzatr,
				refScene,
				centerIndex);
		final double[][] refRates = getSceneRates(
				refScene,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				firstScene,
				lastScene,
				centerIndex);
		rslt.pXpYD = (refScene == centerIndex) ? pXpYDCenter : OpticalFlow.transformToScenePxPyD(
				null,
				refDisparity,
				refPose[0],
				refPose[1],
				quadCLTs[refScene],
				quadCLTs[centerIndex]);
		rslt.reliableMask = (refScene == centerIndex) ? reliableRefCenter : buildPairReliableMask(
				clt_parameters,
				quadCLTs,
				centerIndex,
				refScene,
				centerIndex,
				scenes_xyzatr,
				reliableRefCenter,
				debugLevel);
		double[][] mbVectorsRef = null;
		if (clt_parameters.imp.mb_en) {
			final double[][] refDt = QuadCLTCPU.scaleDtFromErs(
					clt_parameters,
					new double[][] {
						refRates[0].clone(),
						refRates[1].clone()});
			mbVectorsRef = OpticalFlow.getMotionBlur(
					quadCLTs[centerIndex],
					quadCLTs[refScene],
					pXpYDCenter,
					refPose[0],
					refPose[1],
					refDt[0],
					refDt[1],
					0,
					debugLevel);
		}
		final TpTask[][] tasks = Interscene.setReferenceGPU(
				clt_parameters,
				quadCLTs[refScene],
				refDisparity,
				rslt.pXpYD,
				null,
				rslt.reliableMask,
				0,
				clt_parameters.imp.mb_tau,
				mb_max_gain,
				mbVectorsRef,
				debugLevel);
		if ((tasks == null) || (tasks.length == 0) || (tasks[0] == null)) {
			return null;
		}
		rslt.tasks = tasks[0];
		if ((ivar >= 0) && (ivar < cache.length)) {
			cache[ivar] = rslt;
		}
		return rslt;
	}

	/**
	 * Build per-outer-iteration cached observations for all selected pair factors.
	 * Cached arrays are fixed-size and pair-concatenated:
	 * <pre>
	 * Y base = pairOrdinal * (2 * tilesPerPair), stores {dx,dy} per tile
	 * W base = pairOrdinal * tilesPerPair,       stores strength per tile
	 * </pre>
	 *
	 * <p>Each pair uses an overlap-based reliability mask to prevent geometry folds at large offsets
	 * from polluting correlation outputs. Masking is applied before correlation in GPU task setup and
	 * when transferring pair results into {@code Y/W}.
	 */
	private static ObservationCache buildObservationCache(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT,
			final int centerIndex,
			final int firstScene,
			final int lastScene,
			final double[][][] scenes_xyzatr,
			final double[] sceneTimes,
			final ArrayList<PairFactor> pairFactors,
			final double[] refDisparity,
			final double[][] pXpYDCenter,
			final boolean[] reliableRef,
			final double mb_max_gain,
			final double[] minMax,
			final RefGpuData[] refGpuCache,
			final int debugLevel) {
		final ObservationCache cache = new ObservationCache();
		if ((pairFactors == null) || pairFactors.isEmpty()) {
			return cache;
		}
		final int tilesX = centerCLT.getTileProcessor().getTilesX();
		final int tilesY = centerCLT.getTileProcessor().getTilesY();
		cache.tilesPerPair = tilesX * tilesY;
		cache.yVector = new double[2 * cache.tilesPerPair * pairFactors.size()];
		cache.wVector = new double[cache.tilesPerPair * pairFactors.size()];
		for (int ifactor = 0; ifactor < pairFactors.size(); ifactor++) {
			final PairFactor pf = pairFactors.get(ifactor);
			final RefGpuData refGpu = getReferenceGpuData(
					clt_parameters,
					quadCLTs,
					scenes_xyzatr,
					sceneTimes,
					firstScene,
					lastScene,
					centerIndex,
					pf.refSceneIndex,
					refDisparity,
					pXpYDCenter,
					reliableRef,
					mb_max_gain,
					refGpuCache,
					debugLevel);
			if (refGpu == null) {
				cache.failedPairs++;
				continue;
			}
				final PairObservation obs = buildPairObservation(
						clt_parameters,
						quadCLTs,
						centerCLT,
						centerIndex,
					pf.sceneIndex,
					pf.refSceneIndex,
					firstScene,
					lastScene,
					scenes_xyzatr,
					sceneTimes,
					refGpu.pXpYD,
					refGpu.tasks,
					refGpu.reliableMask,
						refDisparity,
						minMax,
						mb_max_gain,
						pf.pairWeight,
						ifactor,
						cache,
						debugLevel);
			if (obs == null) {
				cache.failedPairs++;
				continue;
			}
			cache.observations.add(obs);
			cache.solvedPairs++;
		}
		return cache;
	}

	/**
	 * Debug helper: visualize cached observation vectors for one outer iteration.
	 * Frames (hyperstack bottom slider): dX, dY, W.
	 * Slices (upper slider): correlated pair index, titled as "scene->ref".
	 * Only correlation terms are shown (no smoothing/regularization terms).
	 */
	private static void dumpObservationHyperstack(
			final ObservationCache cache,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT,
			final int outerIteration,
			final boolean show) {
		if ((cache == null) || cache.observations.isEmpty() || (cache.tilesPerPair <= 0)) {
			return;
		}
		final int numPairs = cache.observations.size();
		final int tilesX = centerCLT.getTileProcessor().getTilesX();
		final double[][][] pixels = new double[3][numPairs][];
		final String[] titles = new String[numPairs];
		for (int ipair = 0; ipair < numPairs; ipair++) {
			final PairObservation obs = cache.observations.get(ipair);
			final String sceneName = ((obs.sceneIndex >= 0) && (obs.sceneIndex < quadCLTs.length) && (quadCLTs[obs.sceneIndex] != null)) ?
					quadCLTs[obs.sceneIndex].getImageName() :
					("scene#" + obs.sceneIndex);
			final String refName = ((obs.refSceneIndex >= 0) && (obs.refSceneIndex < quadCLTs.length) && (quadCLTs[obs.refSceneIndex] != null)) ?
					quadCLTs[obs.refSceneIndex].getImageName() :
					("ref#" + obs.refSceneIndex);
			titles[ipair] = sceneName + "->" + refName;
			pixels[0][ipair] = new double[cache.tilesPerPair];
			pixels[1][ipair] = new double[cache.tilesPerPair];
			pixels[2][ipair] = new double[cache.tilesPerPair];
			for (int itile = 0; itile < cache.tilesPerPair; itile++) {
				pixels[0][ipair][itile] = cache.yVector[obs.yBase + 2 * itile];
				pixels[1][ipair][itile] = cache.yVector[obs.yBase + 2 * itile + 1];
				pixels[2][ipair][itile] = cache.wVector[obs.wBase + itile];
			}
		}
		final String[] frameTitles = {"dX","dY","W"};
		final String debugTitle = centerCLT.getImageName() + "-IGR_OBS_OUTER-" + outerIteration;
		final ImagePlus imgDebug = ShowDoubleFloatArrays.showArraysHyperstack(
				pixels,      // double[][][] pixels,
				tilesX,      // int          width,
				debugTitle,  // String       title,
				titles,      // String []    titles,
				frameTitles, // String []    frame_titles,
				show);       // boolean      show
		if (imgDebug != null) {
			centerCLT.saveImagePlusInModelDirectory(imgDebug);
		}
	}

	/**
	 * Build one pair observation block from GPU correlation output and write into
	 * pair-concatenated cache arrays ({@code Y/W}) at offsets defined by {@code pairOrdinal}.
	 */
	private static PairObservation buildPairObservation(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT,
			final int centerIndex,
			final int nscene,
			final int refScene,
			final int earliestScene,
			final int lastScene,
			final double[][][] scenes_xyzatr,
			final double[] sceneTimes,
			final double[][] pXpYDRef,
			final TpTask[] tpTaskRef,
			final boolean[] reliableRef,
			final double[] refDisparity,
			final double[] minMax,
			final double mb_max_gain,
			final double pairWeight,
			final int pairOrdinal,
			final ObservationCache cache,
			final int debugLevel) {
		if ((quadCLTs[nscene] == null) || (scenes_xyzatr[nscene] == null)) {
			return null;
		}
		final boolean[] reliablePair = buildPairReliableMask(
				clt_parameters,
				quadCLTs,
				centerIndex,
				nscene,
				refScene,
				scenes_xyzatr,
				reliableRef,
				debugLevel);
		final double[][] poseNow = getScenePose(
				scenes_xyzatr,
				nscene,
				centerIndex);
		final double[][] sceneRates = getSceneRates(
				nscene,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				centerIndex);
		double[][] mbVectorsScene = null;
		if (clt_parameters.imp.mb_en) {
			final double[][] sceneDt = QuadCLTCPU.scaleDtFromErs(
					clt_parameters,
					new double[][] {
						sceneRates[0].clone(),
						sceneRates[1].clone()});
			mbVectorsScene = OpticalFlow.getMotionBlur(
					centerCLT,
					quadCLTs[nscene],
					pXpYDRef,
					poseNow[0],
					poseNow[1],
					sceneDt[0],
					sceneDt[1],
					0,
					debugLevel);
		}
		final int[] failReason = {0};
		final double[][][] coordMotion = Interscene.interCorrPair(
				clt_parameters,
				false, // force 2D for global cached refinement
				false,
				mb_max_gain,
				minMax,
				failReason,
				centerCLT,
				refDisparity,
				quadCLTs[refScene],
				pXpYDRef,
				tpTaskRef,
				quadCLTs[nscene],
				poseNow[0],
				poseNow[1],
				reliablePair,
				clt_parameters.imp.margin,
				clt_parameters.imp.sensor_mask_inter,
				null,
				null,
				false,
				false,
				true,
				mbVectorsScene,
				-1,
				false,
				clt_parameters.imp.debug_level,
				debugLevel);
		if ((coordMotion == null) || (coordMotion.length < 2) || (coordMotion[0] == null) || (coordMotion[1] == null)) {
			return null;
		}
		final PairObservation obs = new PairObservation();
		obs.sceneIndex = nscene;
		obs.refSceneIndex = refScene;
		obs.pairWeight = pairWeight;
		obs.centers = coordMotion[0];
		obs.eigen = (coordMotion.length > 2) ? coordMotion[2] : null;
		obs.reliableRef = reliablePair;
		obs.yBase = pairOrdinal * 2 * cache.tilesPerPair;
		obs.wBase = pairOrdinal * cache.tilesPerPair;
		final int pairTiles = obs.centers.length;
		final int ntiles = Math.min(
				Math.min(cache.tilesPerPair, pairTiles),
				coordMotion[1].length);
		for (int itile = 0; itile < ntiles; itile++) {
			final double[] v = coordMotion[1][itile];
			if ((v == null) || (v.length < 3)) {
				continue;
			}
			final boolean reliable = (reliablePair == null) || ((itile < reliablePair.length) && reliablePair[itile]);
			final double w = (pairWeight > 0.0) ? ((reliable ? v[2] : 0.0) * pairWeight) : 0.0;
			cache.yVector[obs.yBase + 2 * itile + 0] = v[0];
			cache.yVector[obs.yBase + 2 * itile + 1] = v[1];
			cache.wVector[obs.wBase + itile] = w;
		}
		obs.vectorXys = new double[pairTiles][];
		for (int itile = 0; itile < pairTiles; itile++) {
			if (itile >= cache.tilesPerPair) {
				obs.vectorXys[itile] = null;
				continue;
			}
			final double w = cache.wVector[obs.wBase + itile];
			if (w <= 0.0) {
				obs.vectorXys[itile] = null;
			} else {
				obs.vectorXys[itile] = new double[] {
						cache.yVector[obs.yBase + 2 * itile + 0],
						cache.yVector[obs.yBase + 2 * itile + 1],
						w};
			}
		}
		return obs;
	}

	/**
	 * Dump per-pair outer-loop CSV next to {@code IGR_OBS_OUTER-*.tiff} debug files.
	 *
	 * <p>One plain block ({@code X..RMS}) is emitted for initial values before the first inner step.
	 * Post-inner states are emitted as {@code -0 ... -N} blocks. When {@code includeAllInner}
	 * is false, only initial and final blocks are emitted.
	 *
	 * <p>Each block contains scene XYZATR, scene curvature, and pair RMS for that inner index.
	 * The center row is injected in timestamp order and its curvature is computed from neighbors.
	 */
	private static void dumpObservationCsv(
			final ObservationCache cache,
			final ArrayList<double[][][]> innerSceneHistory,
			final ArrayList<double[]> innerPairRmsHistory,
			final boolean includeAllInner,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT,
			final int outerIteration) {
		if ((cache == null) || cache.observations.isEmpty() || (centerCLT == null) ||
				(innerSceneHistory == null) || (innerPairRmsHistory == null)) {
			return;
		}
		final int nh = Math.min(
				innerSceneHistory.size(),
				innerPairRmsHistory.size());
		if (nh <= 0) {
			return;
		}
		final int[] selectedStates;
		if (includeAllInner || (nh <= 2)) {
			selectedStates = new int[nh];
			for (int i = 0; i < nh; i++) {
				selectedStates[i] = i;
			}
		} else {
			selectedStates = new int[] {0, nh - 1};
		}
		final StringBuilder sb = new StringBuilder();
		sb.append("index,scene,reference,weight");
		for (int isel = 0; isel < selectedStates.length; isel++) {
			final String suf = getStateSuffix(selectedStates[isel]);
			sb.append(",X").append(suf)
			.append(",Y").append(suf)
			.append(",Z").append(suf)
			.append(",A").append(suf)
			.append(",T").append(suf)
			.append(",R").append(suf)
			.append(",CURV_X").append(suf)
			.append(",CURV_Y").append(suf)
			.append(",CURV_Z").append(suf)
			.append(",CURV_A").append(suf)
			.append(",CURV_T").append(suf)
			.append(",CURV_R").append(suf)
			.append(",RMS").append(suf);
		}
		sb.append('\n');
		final String centerTs = ((centerIndex >= 0) && (centerIndex < quadCLTs.length) && (quadCLTs[centerIndex] != null)) ?
				quadCLTs[centerIndex].getImageName() :
				("center#" + centerIndex);
		final Integer[] order = new Integer[cache.observations.size()];
		for (int i = 0; i < order.length; i++) {
			order[i] = i;
		}
		Arrays.sort(
				order,
				(a, b) -> {
					final PairObservation oa = cache.observations.get(a.intValue());
					final PairObservation ob = cache.observations.get(b.intValue());
					if (oa.sceneIndex != ob.sceneIndex) {
						return Integer.compare(oa.sceneIndex, ob.sceneIndex);
					}
					if (oa.refSceneIndex != ob.refSceneIndex) {
						return Integer.compare(oa.refSceneIndex, ob.refSceneIndex);
					}
					return Integer.compare(a.intValue(), b.intValue());
				});
		boolean centerWritten = false;
		for (int iord = 0; iord < order.length; iord++) {
			final int ipair = order[iord].intValue();
			final PairObservation obs = cache.observations.get(ipair);
			if (!centerWritten && (obs.sceneIndex > centerIndex)) {
					appendCsvRow(
							sb,
							centerIndex,
							centerTs,
							centerTs,
							centerIndex,
							-1,
							1.0,
							selectedStates,
							innerSceneHistory,
							innerPairRmsHistory,
						firstScene,
						lastScene,
						centerIndex);
				centerWritten = true;
			}
			final String sceneTs = ((obs.sceneIndex >= 0) && (obs.sceneIndex < quadCLTs.length) && (quadCLTs[obs.sceneIndex] != null)) ?
					quadCLTs[obs.sceneIndex].getImageName() :
					("scene#" + obs.sceneIndex);
			final String refTs = ((obs.refSceneIndex >= 0) && (obs.refSceneIndex < quadCLTs.length) && (quadCLTs[obs.refSceneIndex] != null)) ?
					quadCLTs[obs.refSceneIndex].getImageName() :
					("ref#" + obs.refSceneIndex);
				appendCsvRow(
						sb,
						obs.sceneIndex,
						sceneTs,
						refTs,
						obs.sceneIndex,
						ipair,
						obs.pairWeight,
						selectedStates,
						innerSceneHistory,
						innerPairRmsHistory,
					firstScene,
					lastScene,
					centerIndex);
		}
		if (!centerWritten) {
				appendCsvRow(
						sb,
						centerIndex,
						centerTs,
						centerTs,
						centerIndex,
						-1,
						1.0,
						selectedStates,
						innerSceneHistory,
						innerPairRmsHistory,
					firstScene,
					lastScene,
					centerIndex);
		}
		centerCLT.saveStringInModelDirectory(
				sb.toString(),
				"-IGR_OBS_OUTER-" + outerIteration + ".csv",
				false);
	}

	/**
	 * Dump one combined CSV with initial and final states:
	 * plain columns are initial, apostrophe-suffixed columns are final.
	 */
	private static void dumpObservationCsvInitialFinal(
			final ObservationCache initialCache,
			final double[] initialPairRms,
			final double[][][] scenesInitial,
			final ObservationCache finalCache,
			final double[] finalPairRms,
			final double[][][] scenesFinal,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT) {
		if ((initialCache == null) || (finalCache == null) || (centerCLT == null) ||
				(initialCache.observations == null) || (finalCache.observations == null) ||
				(scenesInitial == null) || (scenesFinal == null)) {
			return;
		}
		final HashMap<Long, Integer> initialMap = new HashMap<Long, Integer>();
		for (int i = 0; i < initialCache.observations.size(); i++) {
			final PairObservation obs = initialCache.observations.get(i);
			initialMap.put(
					makePairKey(obs.sceneIndex, obs.refSceneIndex),
					i);
		}
		final HashMap<Long, Integer> finalMap = new HashMap<Long, Integer>();
		for (int i = 0; i < finalCache.observations.size(); i++) {
			final PairObservation obs = finalCache.observations.get(i);
			finalMap.put(
					makePairKey(obs.sceneIndex, obs.refSceneIndex),
					i);
		}
		final ArrayList<Long> keys = new ArrayList<Long>();
		keys.addAll(initialMap.keySet());
		for (Long k : finalMap.keySet()) {
			if (!initialMap.containsKey(k)) {
				keys.add(k);
			}
		}
		final Long centerKey = Long.valueOf(makePairKey(centerIndex, centerIndex));
		if (!initialMap.containsKey(centerKey) && !finalMap.containsKey(centerKey)) {
			keys.add(centerKey);
		}
		keys.sort((a, b) -> {
			final int sa = pairScene(a.longValue());
			final int sb = pairScene(b.longValue());
			if (sa != sb) {
				return Integer.compare(sa, sb);
			}
			return Integer.compare(pairRef(a.longValue()), pairRef(b.longValue()));
		});

		final StringBuilder sb = new StringBuilder();
		sb.append("index,scene,reference,weight")
		.append(",X,Y,Z,A,T,R,CURV_X,CURV_Y,CURV_Z,CURV_A,CURV_T,CURV_R,RMS")
		.append(",X',Y',Z',A',T',R',CURV_X',CURV_Y',CURV_Z',CURV_A',CURV_T',CURV_R',RMS'")
		.append('\n');
		for (Long keyObj : keys) {
			final long key = keyObj.longValue();
			final int sceneIndex = pairScene(key);
			final int refIndex = pairRef(key);
			final String sceneTs = ((sceneIndex >= 0) && (sceneIndex < quadCLTs.length) && (quadCLTs[sceneIndex] != null)) ?
					quadCLTs[sceneIndex].getImageName() :
					("scene#" + sceneIndex);
			final String refTs = ((refIndex >= 0) && (refIndex < quadCLTs.length) && (quadCLTs[refIndex] != null)) ?
					quadCLTs[refIndex].getImageName() :
					("ref#" + refIndex);
			final double[][] pose0 = getScenePose(
					scenesInitial,
					sceneIndex,
					centerIndex);
			final double[] curv0 = getSceneCurvature(
					sceneIndex,
					firstScene,
					lastScene,
					centerIndex,
					scenesInitial);
			final double[][] pose1 = getScenePose(
					scenesFinal,
					sceneIndex,
					centerIndex);
			final double[] curv1 = getSceneCurvature(
					sceneIndex,
					firstScene,
					lastScene,
					centerIndex,
					scenesFinal);
			double rms0 = Double.NaN;
			double rms1 = Double.NaN;
			double pairWeight = 1.0;
			final Integer i0 = initialMap.get(keyObj);
			if ((i0 != null) && (initialPairRms != null) && (i0.intValue() >= 0) && (i0.intValue() < initialPairRms.length)) {
				rms0 = initialPairRms[i0.intValue()];
			}
			if ((i0 != null) && (i0.intValue() >= 0) && (i0.intValue() < initialCache.observations.size())) {
				pairWeight = initialCache.observations.get(i0.intValue()).pairWeight;
			}
			final Integer i1 = finalMap.get(keyObj);
			if ((i1 != null) && (finalPairRms != null) && (i1.intValue() >= 0) && (i1.intValue() < finalPairRms.length)) {
				rms1 = finalPairRms[i1.intValue()];
			}
			if ((i1 != null) && (i1.intValue() >= 0) && (i1.intValue() < finalCache.observations.size())) {
				pairWeight = finalCache.observations.get(i1.intValue()).pairWeight;
			}
			if ((sceneIndex == centerIndex) && (refIndex == centerIndex)) {
				if (Double.isNaN(rms0)) {
					rms0 = 0.0;
				}
				if (Double.isNaN(rms1)) {
					rms1 = 0.0;
				}
				pairWeight = 1.0;
			}
			sb.append(sceneIndex).append(',')
			.append(sceneTs).append(',')
			.append(refTs).append(',')
			.append(String.format(Locale.US, "%.12g", pairWeight))
			.append(',').append(String.format(Locale.US, "%.12g", pose0[0][0]))
			.append(',').append(String.format(Locale.US, "%.12g", pose0[0][1]))
			.append(',').append(String.format(Locale.US, "%.12g", pose0[0][2]))
			.append(',').append(String.format(Locale.US, "%.12g", pose0[1][0]))
			.append(',').append(String.format(Locale.US, "%.12g", pose0[1][1]))
			.append(',').append(String.format(Locale.US, "%.12g", pose0[1][2]))
			.append(',').append(String.format(Locale.US, "%.12g", curv0[0]))
			.append(',').append(String.format(Locale.US, "%.12g", curv0[1]))
			.append(',').append(String.format(Locale.US, "%.12g", curv0[2]))
			.append(',').append(String.format(Locale.US, "%.12g", curv0[3]))
			.append(',').append(String.format(Locale.US, "%.12g", curv0[4]))
			.append(',').append(String.format(Locale.US, "%.12g", curv0[5]))
			.append(',').append(String.format(Locale.US, "%.12g", rms0))
			.append(',').append(String.format(Locale.US, "%.12g", pose1[0][0]))
			.append(',').append(String.format(Locale.US, "%.12g", pose1[0][1]))
			.append(',').append(String.format(Locale.US, "%.12g", pose1[0][2]))
			.append(',').append(String.format(Locale.US, "%.12g", pose1[1][0]))
			.append(',').append(String.format(Locale.US, "%.12g", pose1[1][1]))
			.append(',').append(String.format(Locale.US, "%.12g", pose1[1][2]))
			.append(',').append(String.format(Locale.US, "%.12g", curv1[0]))
			.append(',').append(String.format(Locale.US, "%.12g", curv1[1]))
			.append(',').append(String.format(Locale.US, "%.12g", curv1[2]))
			.append(',').append(String.format(Locale.US, "%.12g", curv1[3]))
			.append(',').append(String.format(Locale.US, "%.12g", curv1[4]))
			.append(',').append(String.format(Locale.US, "%.12g", curv1[5]))
			.append(',').append(String.format(Locale.US, "%.12g", rms1))
			.append('\n');
		}
		centerCLT.saveStringInModelDirectory(
				sb.toString(),
				"-IGR_OBS_INITIAL_FINAL.csv",
				false);
	}

	private static long makePairKey(
			final int sceneIndex,
			final int refIndex) {
		return (((long) sceneIndex) << 32) | (refIndex & 0xffffffffL);
	}

	private static int pairScene(final long pairKey) {
		return (int) (pairKey >> 32);
	}

	private static int pairRef(final long pairKey) {
		return (int) pairKey;
	}

	private static String getStateSuffix(final int stateIndex) {
		if (stateIndex <= 0) {
			return "";
		}
		return "-" + (stateIndex - 1);
	}

	private static void appendCsvRow(
			final StringBuilder sb,
			final int csvIndex,
			final String sceneTs,
			final String refTs,
			final int sceneIndexForPose,
			final int pairIndexForRms,
			final double pairWeight,
			final int[] selectedStates,
			final ArrayList<double[][][]> innerSceneHistory,
			final ArrayList<double[]> innerPairRmsHistory,
			final int firstScene,
			final int lastScene,
			final int centerIndex) {
		sb.append(csvIndex).append(',')
		.append(sceneTs).append(',')
		.append(refTs).append(',')
		.append(String.format(Locale.US, "%.12g", pairWeight));
		for (int isel = 0; isel < selectedStates.length; isel++) {
			final int stateIndex = selectedStates[isel];
			final double[][][] scenes = innerSceneHistory.get(stateIndex);
			final double[][] pose = getScenePose(
					scenes,
					sceneIndexForPose,
					centerIndex);
			final double[] curvature = getSceneCurvature(
					sceneIndexForPose,
					firstScene,
					lastScene,
					centerIndex,
					scenes);
			final double[] pairRms = innerPairRmsHistory.get(stateIndex);
			final double rms = ((pairIndexForRms >= 0) && (pairRms != null) && (pairIndexForRms < pairRms.length)) ?
					pairRms[pairIndexForRms] :
					0.0;
			sb.append(',').append(String.format(Locale.US, "%.12g", pose[0][0]))
			.append(',').append(String.format(Locale.US, "%.12g", pose[0][1]))
			.append(',').append(String.format(Locale.US, "%.12g", pose[0][2]))
			.append(',').append(String.format(Locale.US, "%.12g", pose[1][0]))
			.append(',').append(String.format(Locale.US, "%.12g", pose[1][1]))
			.append(',').append(String.format(Locale.US, "%.12g", pose[1][2]))
			.append(',').append(String.format(Locale.US, "%.12g", curvature[0]))
			.append(',').append(String.format(Locale.US, "%.12g", curvature[1]))
			.append(',').append(String.format(Locale.US, "%.12g", curvature[2]))
			.append(',').append(String.format(Locale.US, "%.12g", curvature[3]))
			.append(',').append(String.format(Locale.US, "%.12g", curvature[4]))
			.append(',').append(String.format(Locale.US, "%.12g", curvature[5]))
			.append(',').append(String.format(Locale.US, "%.12g", rms));
		}
		sb.append('\n');
	}

	private static double[] getSceneCurvature(
			final int sceneIndex,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final double[][][] scenes_xyzatr) {
		final double[] curv = new double[6];
		Arrays.fill(
				curv,
				Double.NaN);
		if ((sceneIndex <= firstScene) || (sceneIndex >= lastScene)) {
			return curv;
		}
		final int prevScene = sceneIndex - 1;
		final int nextScene = sceneIndex + 1;
		if ((prevScene < firstScene) || (nextScene > lastScene)) {
			return curv;
		}
		final double[][] prevPose = getScenePose(
				scenes_xyzatr,
				prevScene,
				centerIndex);
		final double[][] thisPose = getScenePose(
				scenes_xyzatr,
				sceneIndex,
				centerIndex);
		final double[][] nextPose = getScenePose(
				scenes_xyzatr,
				nextScene,
				centerIndex);
		for (int i = 0; i < 3; i++) {
			curv[i] = thisPose[0][i] - 0.5 * (prevPose[0][i] + nextPose[0][i]);
		}
		for (int i = 0; i < 3; i++) {
			final double dPrevNext = wrapPi(nextPose[1][i] - prevPose[1][i]);
			final double mid = prevPose[1][i] + 0.5 * dPrevNext;
			curv[3 + i] = wrapPi(thisPose[1][i] - mid);
		}
		return curv;
	}

	private static double[] evaluateCachedPairRms(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final int centerIndex,
			final ObservationCache cache,
			final int earliestScene,
			final int lastScene,
			final double[][][] scenes_xyzatr,
			final double[][][] scenes_xyzatr_pull,
			final double[][][] scenes_ybase_xyzatr,
			final double[][][] scenes_rates_xyzatr,
			final double[] sceneTimes,
			final boolean[] paramSelect,
			final double[] paramRegweights,
			final int[] sceneParIndices,
			final boolean[] activePoseScene,
			final boolean disableErs,
			final boolean includeRateChain,
			final int debugLevel) {
		final double[] pairRms = new double[cache.observations.size()];
			Arrays.fill(
					pairRms,
					Double.NaN);
		for (int iobs = 0; iobs < cache.observations.size(); iobs++) {
			final PairObservation obs = cache.observations.get(iobs);
			if (!(obs.pairWeight > 0.0)) {
				continue;
			}
			final int ivar = obs.sceneIndex - earliestScene;
			if ((ivar < 0) || (ivar >= activePoseScene.length) || !activePoseScene[ivar]) {
				continue;
			}
			final LocalSystem ls = accumulateCachedLocalSystem(
					clt_parameters,
					quadCLTs,
					centerIndex,
					obs,
					earliestScene,
					lastScene,
					scenes_xyzatr,
					scenes_xyzatr_pull,
					scenes_ybase_xyzatr,
					scenes_rates_xyzatr,
					sceneTimes,
					paramSelect,
					paramRegweights,
					sceneParIndices,
					activePoseScene,
					disableErs,
					includeRateChain,
					false,
					null,
					null,
					null,
					null,
					debugLevel);
			if (ls.valid && (ls.rms != null) && (ls.rms.length > 0)) {
				pairRms[iobs] = (ls.rms.length > 1) ? ls.rms[1] : ls.rms[0];
			}
		}
		return pairRms;
	}

	private static double[][][] cloneScenePoses(final double[][][] scenes_xyzatr) {
		if (scenes_xyzatr == null) {
			return null;
		}
		final double[][][] copy = new double[scenes_xyzatr.length][][];
		for (int i = 0; i < scenes_xyzatr.length; i++) {
			if (scenes_xyzatr[i] == null) {
				continue;
			}
			copy[i] = new double[scenes_xyzatr[i].length][];
			for (int j = 0; j < scenes_xyzatr[i].length; j++) {
				if (scenes_xyzatr[i][j] != null) {
					copy[i][j] = scenes_xyzatr[i][j].clone();
				}
			}
		}
		return copy;
	}

	private static double[][][] normalizePullScenePoses(
			final double[][][] scenes_xyzatr_pull,
			final double[][][] scenes_xyzatr) {
		if (scenes_xyzatr_pull == null) {
			return null;
		}
		// If pull array aliases mutable current state, freeze it at method entry.
		if (scenes_xyzatr_pull == scenes_xyzatr) {
			return cloneScenePoses(scenes_xyzatr_pull);
		}
		return scenes_xyzatr_pull;
	}

	/**
	 * Build overlap-based reliable mask to suppress far-field fold artifacts in scene mapping.
	 *
	 * <p>Convention follows {@code scanSfmIMS()}:
	 * 1) mask by center->scene overlap
	 * 2) if parent reference is non-center, additionally mask by center->ref overlap
	 *
	 * <p>If {@code reliableBase} is {@code null}, starts from all-true mask.
	 */
	private static boolean[] buildPairReliableMask(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final int centerIndex,
			final int sceneIndex,
			final int refSceneIndex,
			final double[][][] scenes_xyzatr,
			final boolean[] reliableBase,
			final int debugLevel) {
		if ((centerIndex < 0) || (centerIndex >= quadCLTs.length) || (quadCLTs[centerIndex] == null)) {
			return reliableBase;
		}
		final QuadCLT centerCLT = quadCLTs[centerIndex];
		final int tilesX = centerCLT.getTileProcessor().getTilesX();
		final int tilesY = centerCLT.getTileProcessor().getTilesY();
		boolean[] reliable = reliableBase;
		if (reliable == null) {
			reliable = new boolean[tilesX * tilesY];
			Arrays.fill(
					reliable,
					true);
		}
		final double[][] centerPose = getScenePose(
				scenes_xyzatr,
				centerIndex,
				centerIndex);
		final double[][] scenePose = getScenePose(
				scenes_xyzatr,
				sceneIndex,
				centerIndex);
		final double avgZ = centerCLT.getAverageZ(true);
		final boolean fmgRectilinear = clt_parameters.imp.fmg_rectilinear;
		final double[] centerToSceneOffset = centerCLT.estimateCenterShiftXY(
				centerPose,
				scenePose,
				avgZ,
				fmgRectilinear);
		if (centerToSceneOffset != null) {
			reliable = centerCLT.maskByOverlap(
					reliable,
					centerToSceneOffset);
		}
		if (refSceneIndex != centerIndex) {
			final double[][] refPose = getScenePose(
					scenes_xyzatr,
					refSceneIndex,
					centerIndex);
			final double[] centerToRefOffset = centerCLT.estimateCenterShiftXY(
					centerPose,
					refPose,
					avgZ,
					fmgRectilinear);
			if (centerToRefOffset != null) {
				reliable = centerCLT.maskByOverlap(
						reliable,
						centerToRefOffset);
			}
		}
		if (debugLevel > 1) {
			int selected = 0;
			for (int i = 0; i < reliable.length; i++) {
				if (reliable[i]) {
					selected++;
				}
			}
			System.out.println(
					"IntersceneGlobalRefine: overlap mask scene=" + sceneIndex +
					", ref=" + refSceneIndex +
					", selected=" + selected + "/" + reliable.length);
		}
		return reliable;
	}

	private static LocalSystem accumulateCachedLocalSystem(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final int centerIndex,
			final PairObservation obs,
			final int earliestScene,
			final int lastScene,
			final double[][][] scenes_xyzatr,
			final double[][][] scenes_xyzatr_pull,
			final double[][][] scenes_ybase_xyzatr,
			final double[][][] scenes_rates_xyzatr,
			final double[] sceneTimes,
			final boolean[] paramSelect,
			final double[] paramRegweights,
			final int[] sceneParIndices,
			final boolean[] activePoseScene,
			final boolean disableErs,
			final boolean includeRateChain,
			final boolean accumulateNormalEq,
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs,
			final int debugLevel) {
		final LocalSystem ls = new LocalSystem();
		if ((obs == null) || (obs.vectorXys == null) || (obs.centers == null)) {
			return ls;
		}
		final int nscene = obs.sceneIndex;
		final int refScene = obs.refSceneIndex;
		if ((quadCLTs[nscene] == null) || (quadCLTs[refScene] == null) || (scenes_xyzatr[nscene] == null)) {
			return ls;
		}
		final double[][][] scenesYbase = (scenes_ybase_xyzatr != null) ? scenes_ybase_xyzatr : scenes_xyzatr;
		final double[][] basePose = getScenePose(
				scenesYbase,
				nscene,
				centerIndex);
		final double[][] baseRefPose = getScenePose(
				scenesYbase,
				refScene,
				centerIndex);
		final double[][] poseNow = getScenePose(
				scenes_xyzatr,
				nscene,
				centerIndex);
		final double[][] pullPose = (scenes_xyzatr_pull == null) ? null : getScenePose(
				scenes_xyzatr_pull,
				nscene,
				centerIndex);
		final double[][] refPose = getScenePose(
				scenes_xyzatr,
				refScene,
				centerIndex);
		final double[][] sceneRates = getSceneRates(
				nscene,
				scenes_rates_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				centerIndex);
		final double[][] refRates = getSceneRates(
				refScene,
				scenes_rates_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				centerIndex);
		final boolean[] localParamSelect = getLocalParamMaskForPair(
				paramSelect,
				nscene,
				refScene,
				centerIndex,
				disableErs);
		final IntersceneLma lma = new IntersceneLma(
				clt_parameters.ilp.ilma_thread_invariant,
				0.0); // force 2D mode for global cached refinement
		final ErsCorrection ersRef = quadCLTs[refScene].getErsCorrection();
		final ErsCorrection ersScene = quadCLTs[nscene].getErsCorrection();
		final double[] refWatrBak = ersRef.ers_watr_center_dt.clone();
		final double[] refWxyzBak = ersRef.ers_wxyz_center_dt.clone();
		final double[] sceneWatrBak = ersScene.ers_watr_center_dt.clone();
		final double[] sceneWxyzBak = ersScene.ers_wxyz_center_dt.clone();
		final IntersceneLma.NormalEquationData ne;
		try {
			ersRef.ers_watr_center_dt = refRates[1].clone();
			ersRef.ers_wxyz_center_dt = refRates[0].clone();
			ersScene.ers_watr_center_dt = sceneRates[1].clone();
			ersScene.ers_wxyz_center_dt = sceneRates[0].clone();
			// Build fixed target y from the outer-start pose, then relinearize at current inner pose.
			lma.prepareLMA(
					basePose,
					pullPose,
					baseRefPose,
					quadCLTs[nscene],
					quadCLTs[refScene],
					localParamSelect,
					paramRegweights,
					clt_parameters.imp.eig_max_sqrt,
					clt_parameters.imp.eig_min_sqrt,
					obs.eigen,
					obs.vectorXys,
					obs.reliableRef,
					obs.centers,
					false,
					false,
					null,
					debugLevel);
			if (!lma.relinearizeAt(
					poseNow,
					refPose,
					quadCLTs[nscene],
					quadCLTs[refScene],
					debugLevel)) {
				return ls;
			}
			ne = lma.getNormalEquationData(0.0);
		} finally {
			ersRef.ers_watr_center_dt = refWatrBak;
			ersRef.ers_wxyz_center_dt = refWxyzBak;
			ersScene.ers_watr_center_dt = sceneWatrBak;
			ersScene.ers_wxyz_center_dt = sceneWxyzBak;
		}
		if ((ne == null) || (ne.h == null) || (ne.b == null) || (ne.parameterIndices == null)) {
			return ls;
		}
		if (accumulateNormalEq) {
			accumulateMappedNormalEquation(
					ne,
					nscene,
					refScene,
					centerIndex,
					sceneParIndices,
					scenes_rates_xyzatr,
					quadCLTs,
					sceneTimes,
					earliestScene,
					lastScene,
					activePoseScene,
					includeRateChain,
					diag,
					offDiag1,
					offDiag2,
					rhs);
		}
		ls.valid = true;
		ls.rms = (ne.rms == null) ? null : ne.rms.clone();
		return ls;
	}

	private static LocalSystem buildLocalSystem(
			final CLTParameters clt_parameters,
			final QuadCLT[] quadCLTs,
			final QuadCLT centerCLT,
			final int centerIndex,
			final int nscene,
			final int refScene,
			final int earliestScene,
			final int lastScene,
			final double[][][] scenes_xyzatr,
			final double[] sceneTimes,
			final double[][] pXpYDRef,
			final TpTask[] tpTaskRef,
			final boolean[] reliableRef,
			final double[] refDisparity,
			final double[] minMax,
			final boolean[] paramSelect,
			final double[] paramRegweights,
			final int[] sceneParIndices,
			final boolean[] activePoseScene,
			final boolean disableErs,
			final double mb_max_gain,
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs,
			final int debugLevel) {
		final LocalSystem ls = new LocalSystem();
		if ((quadCLTs[nscene] == null) || (scenes_xyzatr[nscene] == null)) {
			return ls;
		}
		final double[][] poseNow = getScenePose(
				scenes_xyzatr,
				nscene,
				centerIndex);
		final double[][] refPose = getScenePose(
				scenes_xyzatr,
				refScene,
				centerIndex);
		final double[][] sceneRates = getSceneRates(
				nscene,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				centerIndex);
		final double[][] refRates = getSceneRates(
				refScene,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				centerIndex);
		final boolean mb_en = clt_parameters.imp.mb_en;
		final double mb_tau = clt_parameters.imp.mb_tau;
		double[][] mbVectorsScene = null;
		if (mb_en) {
			final double[][] sceneDt = QuadCLTCPU.scaleDtFromErs(
					clt_parameters,
					new double[][] {
						sceneRates[0].clone(),
						sceneRates[1].clone()});
			mbVectorsScene = OpticalFlow.getMotionBlur(
					centerCLT,
					quadCLTs[nscene],
					pXpYDRef,
					poseNow[0],
					poseNow[1],
					sceneDt[0],
					sceneDt[1],
					0,
					debugLevel);
		}
		final int[] failReason = {0};
		final double[][][] coordMotion = Interscene.interCorrPair(
				clt_parameters,
				clt_parameters.ilp.ilma_3d,
				false,
				mb_max_gain,
				minMax,
				failReason,
				centerCLT,
				refDisparity,
				quadCLTs[refScene],
				pXpYDRef,
				tpTaskRef,
				quadCLTs[nscene],
				poseNow[0],
				poseNow[1],
				reliableRef,
				clt_parameters.imp.margin,
				clt_parameters.imp.sensor_mask_inter,
				null,
				null,
				false,
				false,
				true,
				mbVectorsScene,
				-1,
				false,
				clt_parameters.imp.debug_level,
				debugLevel);
		if ((coordMotion == null) || (coordMotion.length < 2) || (coordMotion[0] == null) || (coordMotion[1] == null)) {
			return ls;
		}
		final double[][] eigen = (coordMotion.length > 2) ? coordMotion[2] : null;
		final boolean[] localParamSelect = getLocalParamMaskForPair(
				paramSelect,
				nscene,
				refScene,
				centerIndex,
				disableErs);
		final IntersceneLma lma = new IntersceneLma(
				clt_parameters.ilp.ilma_thread_invariant,
				clt_parameters.ilp.ilma_3d_lma ? clt_parameters.ilp.ilma_disparity_weight : 0.0);
		final ErsCorrection ersRef = quadCLTs[refScene].getErsCorrection();
		final ErsCorrection ersScene = quadCLTs[nscene].getErsCorrection();
		final double[] refWatrBak = ersRef.ers_watr_center_dt.clone();
		final double[] refWxyzBak = ersRef.ers_wxyz_center_dt.clone();
		final double[] sceneWatrBak = ersScene.ers_watr_center_dt.clone();
		final double[] sceneWxyzBak = ersScene.ers_wxyz_center_dt.clone();
		ersRef.ers_watr_center_dt = refRates[1].clone();
		ersRef.ers_wxyz_center_dt = refRates[0].clone();
		ersScene.ers_watr_center_dt = sceneRates[1].clone();
		ersScene.ers_wxyz_center_dt = sceneRates[0].clone();
		lma.prepareLMA(
				poseNow,
				null,
				refPose,
				quadCLTs[nscene],
				quadCLTs[refScene],
				localParamSelect,
				paramRegweights,
				clt_parameters.imp.eig_max_sqrt,
				clt_parameters.imp.eig_min_sqrt,
				eigen,
				coordMotion[1],
				reliableRef,
				coordMotion[0],
				false,
				false,
				null,
				debugLevel);
		ersRef.ers_watr_center_dt = refWatrBak;
		ersRef.ers_wxyz_center_dt = refWxyzBak;
		ersScene.ers_watr_center_dt = sceneWatrBak;
		ersScene.ers_wxyz_center_dt = sceneWxyzBak;
		final IntersceneLma.NormalEquationData ne = lma.getNormalEquationData(0.0);
		if ((ne == null) || (ne.h == null) || (ne.b == null) || (ne.parameterIndices == null)) {
			return ls;
		}
		accumulateMappedNormalEquation(
				ne,
				nscene,
				refScene,
				centerIndex,
				sceneParIndices,
				scenes_xyzatr,
				quadCLTs,
				sceneTimes,
				earliestScene,
				lastScene,
				activePoseScene,
				true,
				diag,
				offDiag1,
				offDiag2,
				rhs);
		ls.valid = true;
		ls.rms = (ne.rms == null) ? null : ne.rms.clone();
		return ls;
	}

		private static boolean[] getLocalParamMaskForPair(
				final boolean[] paramSelect,
				final int sceneIndex,
				final int refSceneIndex,
				final int centerIndex,
				final boolean disableErs) {
			// Local mask policy:
			// - Independent optimization parameters are scene XYZATR only.
			// - Reference XYZATR is always fixed.
			// - Rate terms are enabled as dependent auxiliaries for chain-rule mapping.
			//   This includes reference rates even when refSceneIndex == centerIndex:
			//   center is fixed, but center-rate derivatives propagate to neighboring scenes.
			final boolean[] mask = new boolean[ErsCorrection.DP_NUM_PARS];
		if (paramSelect != null) {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				final int pi = SCENE_POSE_PAR_INDICES[i];
				if (pi < paramSelect.length) {
					mask[pi] = paramSelect[pi];
				}
			}
		} else {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				mask[SCENE_POSE_PAR_INDICES[i]] = true;
			}
		}
			if (sceneIndex == centerIndex) {
				for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
					mask[SCENE_POSE_PAR_INDICES[i]] = false;
				}
			}
			for (int i = 0; i < REF_RATE_ANG_PAR_INDICES.length; i++) {
				mask[REF_RATE_ANG_PAR_INDICES[i]] = !disableErs;
				mask[SCENE_RATE_ANG_PAR_INDICES[i]] = !disableErs;
				mask[REF_RATE_LIN_PAR_INDICES[i]] = true;
				mask[SCENE_RATE_LIN_PAR_INDICES[i]] = true;
			}
			return mask;
		}

	private static void zeroRateRegularization(final double[] paramRegweights) {
		for (int i = 0; i < REF_RATE_ANG_PAR_INDICES.length; i++) {
			paramRegweights[REF_RATE_ANG_PAR_INDICES[i]] = 0.0;
			paramRegweights[REF_RATE_LIN_PAR_INDICES[i]] = 0.0;
			paramRegweights[SCENE_RATE_ANG_PAR_INDICES[i]] = 0.0;
			paramRegweights[SCENE_RATE_LIN_PAR_INDICES[i]] = 0.0;
		}
	}

	private static double[][] getScenePose(
			final double[][][] scenes_xyzatr,
			final int sceneIndex,
			final int centerIndex) {
		if (sceneIndex == centerIndex) {
			return new double[][] {
				Interscene.ZERO3.clone(),
				Interscene.ZERO3.clone()};
		}
		if ((sceneIndex < 0) || (sceneIndex >= scenes_xyzatr.length) || (scenes_xyzatr[sceneIndex] == null)) {
			return new double[][] {
				Interscene.ZERO3.clone(),
				Interscene.ZERO3.clone()};
		}
		return new double[][] {
			scenes_xyzatr[sceneIndex][0].clone(),
			scenes_xyzatr[sceneIndex][1].clone()};
	}

	private static double[] getSceneTimes(
			final QuadCLT[] quadCLTs,
			final int firstScene,
			final int lastScene) {
		final double[] sceneTimes = new double[quadCLTs.length];
		Arrays.fill(sceneTimes, Double.NaN);
		for (int nscene = firstScene; nscene <= lastScene; nscene++) {
			if (quadCLTs[nscene] == null) {
				continue;
			}
			sceneTimes[nscene] = parseSceneTimestamp(quadCLTs[nscene].getImageName());
		}
		double t = 0.0;
		for (int nscene = firstScene; nscene <= lastScene; nscene++) {
			if (Double.isNaN(sceneTimes[nscene])) {
				sceneTimes[nscene] = t;
			}
			t = sceneTimes[nscene] + DEFAULT_DT;
		}
		return sceneTimes;
	}

	private static double parseSceneTimestamp(final String ts) {
		if (ts == null) {
			return Double.NaN;
		}
		final String[] parts = ts.trim().split("_");
		if (parts.length != 2) {
			return Double.NaN;
		}
		try {
			final long sec = Long.parseLong(parts[0]);
			final long usec = Long.parseLong(parts[1]);
			return sec + (usec * 1.0e-6);
		} catch (NumberFormatException nfe) {
			return Double.NaN;
		}
	}

	private static int findPrevScene(
			final int sceneIndex,
			final int firstScene,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr) {
		for (int i = sceneIndex - 1; i >= firstScene; i--) {
			if ((quadCLTs[i] != null) && (scenes_xyzatr[i] != null)) {
				return i;
			}
		}
		return -1;
	}

	private static int findNextScene(
			final int sceneIndex,
			final int lastScene,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr) {
		for (int i = sceneIndex + 1; i <= lastScene; i++) {
			if ((quadCLTs[i] != null) && (scenes_xyzatr[i] != null)) {
				return i;
			}
		}
		return -1;
	}

	private static double getPoseComponent(
			final double[][][] scenes_xyzatr,
			final int sceneIndex,
			final int centerIndex,
			final boolean angular,
			final int comp) {
		if (sceneIndex == centerIndex) {
			return 0.0;
		}
		if ((sceneIndex < 0) || (sceneIndex >= scenes_xyzatr.length) || (scenes_xyzatr[sceneIndex] == null)) {
			return 0.0;
		}
		return angular ? scenes_xyzatr[sceneIndex][1][comp] : scenes_xyzatr[sceneIndex][0][comp];
	}

	private static double safeDt(
			final int i0,
			final int i1,
			final double[] sceneTimes) {
		if ((i0 < 0) || (i1 < 0) || (i0 >= sceneTimes.length) || (i1 >= sceneTimes.length)) {
			return DEFAULT_DT;
		}
		final double dt = Math.abs(sceneTimes[i1] - sceneTimes[i0]);
		return (dt > MIN_DIAG) ? dt : DEFAULT_DT;
	}

	private static double wrapPi(double a) {
		while (a > Math.PI) {
			a -= (2.0 * Math.PI);
		}
		while (a < -Math.PI) {
			a += (2.0 * Math.PI);
		}
		return a;
	}

	private static ArrayList<RateTerm> getRateTerms(
			final int sceneIndex,
			final int firstScene,
			final int lastScene,
			final QuadCLT[] quadCLTs,
			final double[][][] scenes_xyzatr,
			final double[] sceneTimes) {
		final int prev = findPrevScene(
				sceneIndex,
				firstScene,
				quadCLTs,
				scenes_xyzatr);
		final int next = findNextScene(
				sceneIndex,
				lastScene,
				quadCLTs,
				scenes_xyzatr);
		final ArrayList<RateTerm> terms = new ArrayList<RateTerm>(2);
		if ((prev < 0) && (next < 0)) {
			return terms;
		}
		if (prev < 0) {
			final double dt = safeDt(
					sceneIndex,
					next,
					sceneTimes);
			terms.add(new RateTerm(sceneIndex, -1.0 / dt));
			terms.add(new RateTerm(next,       1.0 / dt));
			return terms;
		}
		if (next < 0) {
			final double dt = safeDt(
					prev,
					sceneIndex,
					sceneTimes);
			terms.add(new RateTerm(prev,      -1.0 / dt));
			terms.add(new RateTerm(sceneIndex, 1.0 / dt));
			return terms;
		}
		final double dt = safeDt(
				prev,
				next,
				sceneTimes);
		terms.add(new RateTerm(prev, -1.0 / dt));
		terms.add(new RateTerm(next,  1.0 / dt));
		return terms;
	}

	private static double getRateValue(
			final int sceneIndex,
			final boolean angular,
			final int comp,
			final int firstScene,
			final int lastScene,
			final int centerIndex,
			final double[][][] scenes_xyzatr,
			final QuadCLT[] quadCLTs,
			final double[] sceneTimes) {
		final int prev = findPrevScene(
				sceneIndex,
				firstScene,
				quadCLTs,
				scenes_xyzatr);
		final int next = findNextScene(
				sceneIndex,
				lastScene,
				quadCLTs,
				scenes_xyzatr);
		if ((prev < 0) && (next < 0)) {
			return 0.0;
		}
		if (prev < 0) {
			final double dt = safeDt(
					sceneIndex,
					next,
					sceneTimes);
			final double d = getPoseComponent(scenes_xyzatr, next, centerIndex, angular, comp) -
					getPoseComponent(scenes_xyzatr, sceneIndex, centerIndex, angular, comp);
			return (angular ? wrapPi(d) : d) / dt;
		}
		if (next < 0) {
			final double dt = safeDt(
					prev,
					sceneIndex,
					sceneTimes);
			final double d = getPoseComponent(scenes_xyzatr, sceneIndex, centerIndex, angular, comp) -
					getPoseComponent(scenes_xyzatr, prev, centerIndex, angular, comp);
			return (angular ? wrapPi(d) : d) / dt;
		}
		final double dt = safeDt(
				prev,
				next,
				sceneTimes);
		final double d = getPoseComponent(scenes_xyzatr, next, centerIndex, angular, comp) -
				getPoseComponent(scenes_xyzatr, prev, centerIndex, angular, comp);
		return (angular ? wrapPi(d) : d) / dt;
	}

	private static double[][] getSceneRates(
			final int sceneIndex,
			final double[][][] scenes_xyzatr,
			final QuadCLT[] quadCLTs,
			final double[] sceneTimes,
			final int firstScene,
			final int lastScene,
			final int centerIndex) {
		final double[][] rates = new double[2][3]; // [xyz,atr]
		for (int i = 0; i < 3; i++) {
			rates[0][i] = getRateValue(
					sceneIndex,
					false,
					i,
					firstScene,
					lastScene,
					centerIndex,
					scenes_xyzatr,
					quadCLTs,
					sceneTimes);
			rates[1][i] = getRateValue(
					sceneIndex,
					true,
					i,
					firstScene,
					lastScene,
					centerIndex,
					scenes_xyzatr,
					quadCLTs,
					sceneTimes);
		}
		return rates;
	}

	// -----------------------------------------------------------------------------
	// Chain-rule projection: local pair normal equations -> global pose-only system
	// -----------------------------------------------------------------------------

	private static ArrayList<GlobalTerm> mapLocalParameterToGlobalTerms(
			final int parIndex,
			final int sceneIndex,
			final int refSceneIndex,
			final int centerIndex,
			final int[] sceneParIndices,
			final int firstScene,
			final int lastScene,
			final boolean[] activePoseScene,
			final double[][][] scenes_xyzatr,
			final QuadCLT[] quadCLTs,
			final double[] sceneTimes,
			final boolean includeRateChain) {
		final ArrayList<GlobalTerm> rslt = new ArrayList<GlobalTerm>(2);
		switch (parIndex) {
		case ErsCorrection.DP_DSAZ:
		case ErsCorrection.DP_DSTL:
		case ErsCorrection.DP_DSRL:
		case ErsCorrection.DP_DSX:
		case ErsCorrection.DP_DSY:
		case ErsCorrection.DP_DSZ:
		{
			final int pLocal = mapSceneParameterIndex(
					parIndex,
					sceneParIndices);
			if (pLocal >= 0) {
				final int ivar = sceneIndex - firstScene;
				if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
					rslt.add(new GlobalTerm(ivar, pLocal, 1.0));
				}
			}
			break;
		}
		case ErsCorrection.DP_DSVAZ:
		case ErsCorrection.DP_DSVTL:
		case ErsCorrection.DP_DSVRL:
		{
			if (!includeRateChain) {
				break;
			}
			final int comp = parIndex - ErsCorrection.DP_DSVAZ;
			final int pLocal = mapSceneParameterIndex(
					ErsCorrection.DP_DSAZ + comp,
					sceneParIndices);
			if (pLocal >= 0) {
				final ArrayList<RateTerm> rts = getRateTerms(
						sceneIndex,
						firstScene,
						lastScene,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes);
				for (RateTerm rt : rts) {
					if (rt.sceneIndex == centerIndex) {
						continue;
					}
					final int ivar = rt.sceneIndex - firstScene;
					if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
						rslt.add(new GlobalTerm(ivar, pLocal, rt.coeff));
					}
				}
			}
			break;
		}
		case ErsCorrection.DP_DSVX:
		case ErsCorrection.DP_DSVY:
		case ErsCorrection.DP_DSVZ:
		{
			if (!includeRateChain) {
				break;
			}
			final int comp = parIndex - ErsCorrection.DP_DSVX;
			final int pLocal = mapSceneParameterIndex(
					ErsCorrection.DP_DSX + comp,
					sceneParIndices);
			if (pLocal >= 0) {
				final ArrayList<RateTerm> rts = getRateTerms(
						sceneIndex,
						firstScene,
						lastScene,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes);
				for (RateTerm rt : rts) {
					if (rt.sceneIndex == centerIndex) {
						continue;
					}
					final int ivar = rt.sceneIndex - firstScene;
					if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
						rslt.add(new GlobalTerm(ivar, pLocal, rt.coeff));
					}
				}
			}
			break;
		}
		case ErsCorrection.DP_DVAZ:
		case ErsCorrection.DP_DVTL:
		case ErsCorrection.DP_DVRL:
		{
			if (!includeRateChain) {
				break;
			}
			final int comp = parIndex - ErsCorrection.DP_DVAZ;
			final int pLocal = mapSceneParameterIndex(
					ErsCorrection.DP_DSAZ + comp,
					sceneParIndices);
			if (pLocal >= 0) {
				final ArrayList<RateTerm> rts = getRateTerms(
						refSceneIndex,
						firstScene,
						lastScene,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes);
				for (RateTerm rt : rts) {
					if (rt.sceneIndex == centerIndex) {
						continue;
					}
					final int ivar = rt.sceneIndex - firstScene;
					if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
						rslt.add(new GlobalTerm(ivar, pLocal, rt.coeff));
					}
				}
			}
			break;
		}
		case ErsCorrection.DP_DVX:
		case ErsCorrection.DP_DVY:
		case ErsCorrection.DP_DVZ:
		{
			if (!includeRateChain) {
				break;
			}
			final int comp = parIndex - ErsCorrection.DP_DVX;
			final int pLocal = mapSceneParameterIndex(
					ErsCorrection.DP_DSX + comp,
					sceneParIndices);
			if (pLocal >= 0) {
				final ArrayList<RateTerm> rts = getRateTerms(
						refSceneIndex,
						firstScene,
						lastScene,
						quadCLTs,
						scenes_xyzatr,
						sceneTimes);
				for (RateTerm rt : rts) {
					if (rt.sceneIndex == centerIndex) {
						continue;
					}
					final int ivar = rt.sceneIndex - firstScene;
					if ((ivar >= 0) && (ivar < activePoseScene.length) && activePoseScene[ivar]) {
						rslt.add(new GlobalTerm(ivar, pLocal, rt.coeff));
					}
				}
			}
			break;
		}
		default:
		}
		return rslt;
	}

	private static void addToBlockBand(
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final int i,
			final int j,
			final int ri,
			final int cj,
			final double v) {
		if (i == j) {
			diag[i][ri][cj] += v;
			return;
		}
		final int d = j - i;
		if (Math.abs(d) == 1) {
			if (d > 0) {
				offDiag1[i][ri][cj] += v;
			} else {
				offDiag1[j][cj][ri] += v;
			}
		} else if (Math.abs(d) == 2) {
			if (d > 0) {
				offDiag2[i][ri][cj] += v;
			} else {
				offDiag2[j][cj][ri] += v;
			}
		}
	}

	// -----------------------------------------------------------------------------
	// Global system assembly / solve
	// -----------------------------------------------------------------------------

	private static void accumulateMappedNormalEquation(
			final IntersceneLma.NormalEquationData ne,
			final int sceneIndex,
			final int refSceneIndex,
			final int centerIndex,
			final int[] sceneParIndices,
			final double[][][] scenes_xyzatr,
			final QuadCLT[] quadCLTs,
			final double[] sceneTimes,
			final int firstScene,
			final int lastScene,
			final boolean[] activePoseScene,
			final boolean includeRateChain,
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs) {
		final ArrayList<GlobalTerm>[] maps = new ArrayList[ne.parameterIndices.length];
		for (int i = 0; i < maps.length; i++) {
			maps[i] = mapLocalParameterToGlobalTerms(
					ne.parameterIndices[i],
					sceneIndex,
					refSceneIndex,
					centerIndex,
					sceneParIndices,
					firstScene,
						lastScene,
						activePoseScene,
						scenes_xyzatr,
						quadCLTs,
						sceneTimes,
						includeRateChain);
		}
		for (int i = 0; i < ne.parameterIndices.length; i++) {
			if (maps[i].isEmpty()) {
				continue;
			}
			for (GlobalTerm gi : maps[i]) {
				rhs[gi.varIndex][gi.parIndex] += gi.coeff * ne.b[i];
			}
			for (int j = 0; j < ne.parameterIndices.length; j++) {
				if (maps[j].isEmpty()) {
					continue;
				}
				for (GlobalTerm gi : maps[i]) {
					for (GlobalTerm gj : maps[j]) {
						addToBlockBand(
								diag,
								offDiag1,
								offDiag2,
								gi.varIndex,
								gj.varIndex,
								gi.parIndex,
								gj.parIndex,
								gi.coeff * ne.h[i][j] * gj.coeff);
					}
				}
			}
		}
	}

	private static boolean[] getSceneParamMask(
			final boolean[] paramSelect,
			final boolean disableErs) {
		// Note: disableErs is intentionally ignored here; global unknowns are pose-only.
		final boolean[] mask = new boolean[ErsCorrection.DP_NUM_PARS];
		if (paramSelect == null) {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				mask[SCENE_POSE_PAR_INDICES[i]] = true;
			}
		} else {
			for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
				final int pi = SCENE_POSE_PAR_INDICES[i];
				if (pi < paramSelect.length) {
					mask[pi] = paramSelect[pi];
				}
			}
		}
		// keep only pose slots for global unknowns (reference scenes remain fixed)
		return mask;
	}

	private static int[] getActiveSceneParameterIndices(final boolean[] sceneMask) {
		int n = 0;
		for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
			if (sceneMask[SCENE_POSE_PAR_INDICES[i]]) {
				n++;
			}
		}
		if (n == 0) {
			return new int[0];
		}
		final int[] indices = new int[n];
		int indx = 0;
		for (int i = 0; i < SCENE_POSE_PAR_INDICES.length; i++) {
			final int pi = SCENE_POSE_PAR_INDICES[i];
			if (sceneMask[pi]) {
				indices[indx++] = pi;
			}
		}
		return indices;
	}

	private static final class LpfSetup {
		final double[] controlSq;
		final double[] normScaleSq;
		final double[] baseWeights;

		LpfSetup(final int npars) {
			this.controlSq = new double[npars];
			this.normScaleSq = new double[npars];
			this.baseWeights = new double[npars];
		}
	}

	private static LpfSetup getSceneLpfSetup(
			final int[] sceneParIndices,
			final double[] paramLpf,
			final Options options,
			final double xyzScale) {
		final LpfSetup rslt = new LpfSetup(sceneParIndices.length);
		final double xyzScaleSafe = (xyzScale > MIN_DIAG) ? xyzScale : 1.0;
		final double xyzInvScaleSq = 1.0 / (xyzScaleSafe * xyzScaleSafe);
		for (int i = 0; i < sceneParIndices.length; i++) {
			final int parIndex = sceneParIndices[i];
			final double baseControl;
			if ((paramLpf != null) && (parIndex < paramLpf.length)) {
				baseControl = paramLpf[parIndex];
			} else {
				switch (parIndex) {
				case ErsCorrection.DP_DSAZ:
				case ErsCorrection.DP_DSTL:
				case ErsCorrection.DP_DSRL:
					baseControl = options.smoothWeightAtr;
					break;
				case ErsCorrection.DP_DSX:
				case ErsCorrection.DP_DSY:
				case ErsCorrection.DP_DSZ:
					baseControl = options.smoothWeightXyz;
					break;
				default:
					baseControl = 0.0;
				}
			}
			final double c = Math.abs(baseControl);
			rslt.controlSq[i] = c * c;
			switch (parIndex) {
			case ErsCorrection.DP_DSX:
			case ErsCorrection.DP_DSY:
			case ErsCorrection.DP_DSZ:
				// Curvature is in meters for XYZ and radians for ATR.
				// Use unitless XYZ curvature (divide by avg altitude), implemented as 1/scale^2 in weight space.
				rslt.normScaleSq[i] = xyzInvScaleSq;
				break;
			default:
				rslt.normScaleSq[i] = 1.0;
			}
			rslt.baseWeights[i] = sanitizeLpfWeight(rslt.controlSq[i] * rslt.normScaleSq[i]);
		}
		return rslt;
	}

	private static double[] autoNormalizeSceneLpfWeights(
			final double[] x,
			final LpfSetup lpfSetup,
			final double[] pairPixelSensitivity,
			final boolean[] activePoseScene,
			final int npars,
			final double pairSqSum,
			final int solvedPairs) {
		if (lpfSetup == null) {
			return new double[npars];
		}
		final double[] rslt = lpfSetup.baseWeights.clone();
		if ((x == null) || (pairPixelSensitivity == null) || !(pairSqSum > 0.0) || (solvedPairs <= 0)) {
			return rslt;
		}
		final double[] rawCurvSq = new double[npars];
		if (activePoseScene.length < 3) {
			return rslt;
		}
		for (int i = 1; i < (activePoseScene.length - 1); i++) {
			final int iPrev = i - 1;
			final int iCur = i;
			final int iNext = i + 1;
			if (!(activePoseScene[iPrev] || activePoseScene[iCur] || activePoseScene[iNext])) {
				continue;
			}
			for (int k = 0; k < npars; k++) {
				if (!(lpfSetup.controlSq[k] > 0.0)) {
					continue;
				}
				final double e = x[iCur * npars + k] - 0.5 * (x[iPrev * npars + k] + x[iNext * npars + k]);
				rawCurvSq[k] += e * e;
			}
		}
		int activePars = 0;
		for (int k = 0; k < npars; k++) {
			final double scaledCurvSq = lpfSetup.normScaleSq[k] * rawCurvSq[k];
			if ((lpfSetup.controlSq[k] > 0.0) &&
					(scaledCurvSq > MIN_DIAG) &&
					(pairPixelSensitivity[k] > 0.0)) {
				activePars++;
			}
		}
		if (activePars == 0) {
			return rslt;
		}
		double meanSensitivity = 0.0;
		for (int k = 0; k < npars; k++) {
			final double scaledCurvSq = lpfSetup.normScaleSq[k] * rawCurvSq[k];
			if ((lpfSetup.controlSq[k] > 0.0) &&
					(scaledCurvSq > MIN_DIAG) &&
					(pairPixelSensitivity[k] > 0.0)) {
				meanSensitivity += pairPixelSensitivity[k];
			}
		}
		meanSensitivity /= activePars;
		if (!(meanSensitivity > 0.0)) {
			return rslt;
		}
		// Dataset-size independent target: use per-pair average residual energy.
		final double pairMeanSq = pairSqSum / solvedPairs;
		final double perParamBudget = pairMeanSq / activePars;
		for (int k = 0; k < npars; k++) {
			final double scaledCurvSq = lpfSetup.normScaleSq[k] * rawCurvSq[k];
			if (!((lpfSetup.controlSq[k] > 0.0) &&
					(scaledCurvSq > MIN_DIAG) &&
					(pairPixelSensitivity[k] > 0.0))) {
				rslt[k] = 0.0;
				continue;
			}
			final double sensitivityScale = pairPixelSensitivity[k] / meanSensitivity;
			rslt[k] = sanitizeLpfWeight(
					perParamBudget *
					lpfSetup.controlSq[k] *
					sensitivityScale *
					lpfSetup.normScaleSq[k] / scaledCurvSq);
			}
		return rslt;
	}

	private static double[] estimatePairPixelSensitivity(
			final double[][][] diag,
			final boolean[] activePoseScene,
			final int npars) {
		final double[] sumDiag = new double[npars];
		final int[] countDiag = new int[npars];
		for (int ivar = 0; ivar < diag.length; ivar++) {
			if (!activePoseScene[ivar]) {
				continue;
			}
			for (int k = 0; k < npars; k++) {
				final double d = diag[ivar][k][k];
				if (!(d > 0.0)) {
					continue;
				}
				sumDiag[k] += d;
				countDiag[k]++;
			}
		}
		final double[] rslt = new double[npars];
		for (int k = 0; k < npars; k++) {
			rslt[k] = (countDiag[k] > 0) ? Math.sqrt(sumDiag[k] / countDiag[k]) : 0.0;
		}
		return rslt;
	}

	private static double sanitizeLpfWeight(final double w) {
		if (!(w > 0.0) || !Double.isFinite(w)) {
			return 0.0;
		}
		return Math.min(
				w,
				MAX_EFFECTIVE_LPF_WEIGHT);
	}

	private static String formatLpfEffectiveWeights(
			final int[] sceneParIndices,
			final double[] sceneParamLpf) {
		final int[] order = {
				ErsCorrection.DP_DSX,
				ErsCorrection.DP_DSY,
				ErsCorrection.DP_DSZ,
				ErsCorrection.DP_DSAZ,
				ErsCorrection.DP_DSTL,
				ErsCorrection.DP_DSRL};
		final String[] names = {"X", "Y", "Z", "A", "T", "R"};
		final StringBuilder sb = new StringBuilder();
		sb.append('[');
		boolean first = true;
		for (int i = 0; i < order.length; i++) {
			final int k = mapSceneParameterIndex(
					order[i],
					sceneParIndices);
			if (k < 0) {
				continue;
			}
			if (!first) {
				sb.append(", ");
			}
			first = false;
			sb.append(names[i]).append('=').append(String.format(Locale.US, "%.6g", sceneParamLpf[k]));
		}
		sb.append(']');
		return sb.toString();
	}

	private static String formatCurvatureBreakdown(
			final int[] sceneParIndices,
			final double[] sumSqByPar,
			final double[] sumWByPar) {
		final int[] order = {
				ErsCorrection.DP_DSX,
				ErsCorrection.DP_DSY,
				ErsCorrection.DP_DSZ,
				ErsCorrection.DP_DSAZ,
				ErsCorrection.DP_DSTL,
				ErsCorrection.DP_DSRL};
		final String[] names = {"X", "Y", "Z", "A", "T", "R"};
		final StringBuilder sb = new StringBuilder();
		sb.append("lpfE(");
		boolean first = true;
		for (int i = 0; i < order.length; i++) {
			final int k = mapSceneParameterIndex(
					order[i],
					sceneParIndices);
			if (k < 0) {
				continue;
			}
			if (!first) {
				sb.append(',');
			}
			first = false;
			sb.append(names[i]).append('=').append(String.format(Locale.US, "%.6g", sumSqByPar[k]));
		}
		sb.append(") lpfR(");
		first = true;
		for (int i = 0; i < order.length; i++) {
			final int k = mapSceneParameterIndex(
					order[i],
					sceneParIndices);
			if (k < 0) {
				continue;
			}
				if (!first) {
					sb.append(',');
				}
				first = false;
				final double w = sumWByPar[k];
				if (!(w > 0.0)) {
					sb.append(names[i]).append("=off");
					continue;
				}
				final double sq = sumSqByPar[k];
				if (!(sq >= 0.0) || !Double.isFinite(sq)) {
					sb.append(names[i]).append("=nan");
					continue;
				}
				final double rms = Math.sqrt(sq / w);
				sb.append(names[i]).append('=').append(String.format(Locale.US, "%.6g", rms));
			}
		sb.append(')');
		return sb.toString();
	}

	private static int mapSceneParameterIndex(
			final int parIndex,
			final int[] sceneParIndices) {
		for (int i = 0; i < sceneParIndices.length; i++) {
			if (sceneParIndices[i] == parIndex) {
				return i;
			}
		}
		return -1;
	}

	private static double getSceneParameterValue(
			final double[][] xyzatr,
			final int parIndex) {
		switch (parIndex) {
		case ErsCorrection.DP_DSAZ:
			return xyzatr[1][0];
		case ErsCorrection.DP_DSTL:
			return xyzatr[1][1];
		case ErsCorrection.DP_DSRL:
			return xyzatr[1][2];
		case ErsCorrection.DP_DSX:
			return xyzatr[0][0];
		case ErsCorrection.DP_DSY:
			return xyzatr[0][1];
		case ErsCorrection.DP_DSZ:
			return xyzatr[0][2];
		default:
			return 0.0;
		}
	}

	private static void setSceneParameterValue(
			final double[][] xyzatr,
			final int parIndex,
			final double value) {
		switch (parIndex) {
		case ErsCorrection.DP_DSAZ:
			xyzatr[1][0] = value;
			break;
		case ErsCorrection.DP_DSTL:
			xyzatr[1][1] = value;
			break;
		case ErsCorrection.DP_DSRL:
			xyzatr[1][2] = value;
			break;
		case ErsCorrection.DP_DSX:
			xyzatr[0][0] = value;
			break;
		case ErsCorrection.DP_DSY:
			xyzatr[0][1] = value;
			break;
		case ErsCorrection.DP_DSZ:
			xyzatr[0][2] = value;
			break;
		default:
		}
	}

	private static double[] getSceneVector(
			final double[][] xyzatr,
			final int[] sceneParIndices) {
		final double[] rslt = new double[sceneParIndices.length];
		for (int i = 0; i < sceneParIndices.length; i++) {
			rslt[i] = getSceneParameterValue(
					xyzatr,
					sceneParIndices[i]);
		}
		return rslt;
	}

	private static void setSceneVector(
			final double[][] xyzatr,
			final double[] vec,
			final int[] sceneParIndices) {
		for (int i = 0; i < sceneParIndices.length; i++) {
			setSceneParameterValue(
					xyzatr,
					sceneParIndices[i],
					vec[i]);
		}
	}

	private static void setSceneState(
			final double[] x,
			final int ivar,
			final double[] state,
			final int npars) {
		System.arraycopy(state, 0, x, ivar * npars, npars);
	}

	private static double[] getSceneState(
			final double[] x,
			final int ivar,
			final int npars) {
		final double[] rslt = new double[npars];
		System.arraycopy(x, ivar * npars, rslt, 0, npars);
		return rslt;
	}

	private static void zeroNormalBlocks(
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs) {
		for (int i = 0; i < diag.length; i++) {
			for (int r = 0; r < diag[i].length; r++) {
				Arrays.fill(
						diag[i][r],
						0.0);
			}
			Arrays.fill(
					rhs[i],
					0.0);
		}
		for (int i = 0; i < offDiag1.length; i++) {
			for (int r = 0; r < offDiag1[i].length; r++) {
				Arrays.fill(
						offDiag1[i][r],
						0.0);
			}
		}
		for (int i = 0; i < offDiag2.length; i++) {
			for (int r = 0; r < offDiag2[i].length; r++) {
				Arrays.fill(
						offDiag2[i][r],
						0.0);
			}
		}
	}

	private static void assembleCurvatureBlocks(
			final double[] x,
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs,
			final double[] paramLpfScene,
			final boolean[] activePoseScene,
			final int npars) {
			if (diag.length < 3) {
				return;
			}
			for (int i = 1; i < (diag.length - 1); i++) {
				final int iPrev = i - 1;
				final int iCur = i;
				final int iNext = i + 1;
				final int[] idx = {iPrev, iCur, iNext};
				final double[] coeff = {-0.5, 1.0, -0.5};
				final boolean anyActive = activePoseScene[iPrev] || activePoseScene[iCur] || activePoseScene[iNext];
				if (!anyActive) {
					continue;
				}
				for (int k = 0; k < npars; k++) {
					final double w = paramLpfScene[k];
					if (w <= 0.0) {
						continue;
					}
					final double e =
							coeff[0] * x[iPrev * npars + k] +
							coeff[1] * x[iCur * npars + k] +
							coeff[2] * x[iNext * npars + k];
					for (int p = 0; p < 3; p++) {
						if (!activePoseScene[idx[p]]) {
							continue;
						}
						rhs[idx[p]][k] += -w * coeff[p] * e;
						for (int q = p; q < 3; q++) {
							if (!activePoseScene[idx[q]]) {
								continue;
							}
							addToBlockBand(
									diag,
									offDiag1,
									offDiag2,
									idx[p],
									idx[q],
									k,
									k,
									w * coeff[p] * coeff[q]);
						}
					}
				}
			}
		}

	private static double[] getCurvatureStats(
			final double[] x,
			final double[] paramLpfScene,
			final boolean[] activePoseScene,
			final int npars) {
		return getCurvatureStats(
				x,
				paramLpfScene,
				activePoseScene,
				npars,
				null,
				null);
	}

	private static double[] getCurvatureStats(
			final double[] x,
			final double[] paramLpfScene,
			final boolean[] activePoseScene,
			final int npars,
			final double[] sumSqByPar,
			final double[] sumWByPar) {
		double sumSq = 0.0;
		double sumW = 0.0;
		if (sumSqByPar != null) {
			Arrays.fill(
					sumSqByPar,
					0.0);
		}
		if (sumWByPar != null) {
			Arrays.fill(
					sumWByPar,
					0.0);
		}
		if (activePoseScene.length < 3) {
			return new double[] {sumSq, sumW};
		}
		for (int i = 1; i < (activePoseScene.length - 1); i++) {
			final int iPrev = i - 1;
			final int iCur = i;
			final int iNext = i + 1;
			if (!(activePoseScene[iPrev] || activePoseScene[iCur] || activePoseScene[iNext])) {
				continue;
			}
			for (int k = 0; k < npars; k++) {
				final double w = paramLpfScene[k];
				if (w <= 0.0) {
					continue;
				}
				final double e = x[iCur * npars + k] - 0.5 * (x[iPrev * npars + k] + x[iNext * npars + k]);
				final double wsq = w * e * e;
				sumSq += wsq;
				sumW += w;
				if (sumSqByPar != null) {
					sumSqByPar[k] += wsq;
				}
				if (sumWByPar != null) {
					sumWByPar[k] += w;
				}
			}
		}
		return new double[] {sumSq, sumW};
	}

	private static double applyDeltaToState(
			final double[] x,
			final double[][][] scenes_xyzatr,
			final double[] delta,
			final double scale,
			final int firstScene,
			final int endScene,
			final boolean[] activePoseScene,
			final int npars,
			final int[] sceneParIndices) {
		double maxDelta = 0.0;
		for (int nscene = firstScene; nscene <= endScene; nscene++) {
			final int ivar = nscene - firstScene;
			if (!activePoseScene[ivar]) {
				continue;
			}
			final double[] state = getSceneState(
					x,
					ivar,
					npars);
			for (int k = 0; k < npars; k++) {
				final double d = scale * delta[ivar * npars + k];
				state[k] += d;
				maxDelta = Math.max(maxDelta, Math.abs(d));
			}
			setSceneState(
					x,
					ivar,
					state,
					npars);
			if (scenes_xyzatr[nscene] != null) {
				setSceneVector(
						scenes_xyzatr[nscene],
						state,
						sceneParIndices);
			}
		}
		return maxDelta;
	}

	private static void regularizeDiagonalAndFixInactive(
			final double[][][] diag,
			final double[][] rhs,
			final double lambdaDiag,
			final boolean[] activePoseScene,
			final int npars) {
		for (int i = 0; i < diag.length; i++) {
			if (!activePoseScene[i]) {
				for (int k = 0; k < npars; k++) {
					for (int c = 0; c < npars; c++) {
						diag[i][k][c] = 0.0;
					}
					diag[i][k][k] = 1.0;
					rhs[i][k] = 0.0;
				}
				continue;
			}
			for (int k = 0; k < npars; k++) {
				final double d = Math.max(MIN_DIAG, Math.abs(diag[i][k][k]));
				diag[i][k][k] += lambdaDiag * d;
			}
		}
	}

	private static int solvePcg(
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[][] rhs,
			final double[] delta,
			final int maxIter,
			final double tol,
			final int npars) {
		final int nvars = diag.length;
		final int n = nvars * npars;
		Arrays.fill(delta, 0.0);
		final double[] b = new double[n];
		final double[] r = new double[n];
		final double[] z = new double[n];
		final double[] p = new double[n];
		final double[] ap = new double[n];
		final double[] invDiag = new double[n];
		for (int i = 0; i < nvars; i++) {
			for (int k = 0; k < npars; k++) {
				final int idx = i * npars + k;
				b[idx] = rhs[i][k];
				r[idx] = b[idx];
				invDiag[idx] = 1.0 / Math.max(MIN_DIAG, diag[i][k][k]);
				z[idx] = r[idx] * invDiag[idx];
				p[idx] = z[idx];
			}
		}
		double rz = dot(r, z);
		if (Math.abs(rz) < MIN_DIAG) {
			return 0;
		}
		int iter;
		for (iter = 0; iter < maxIter; iter++) {
			matVec(
					diag,
					offDiag1,
					offDiag2,
					p,
					ap,
					npars);
			final double pAp = dot(p, ap);
			if (Math.abs(pAp) < MIN_DIAG) {
				break;
			}
			final double alpha = rz / pAp;
			axpy(alpha, p, delta);
			axpy(-alpha, ap, r);
			if (normInf(r) < tol) {
				iter++;
				break;
			}
			for (int i = 0; i < n; i++) {
				z[i] = r[i] * invDiag[i];
			}
			final double rzNext = dot(r, z);
			if (Math.abs(rzNext) < MIN_DIAG) {
				break;
			}
			final double beta = rzNext / rz;
			for (int i = 0; i < n; i++) {
				p[i] = z[i] + beta * p[i];
			}
			rz = rzNext;
		}
		return iter;
	}

	private static void matVec(
			final double[][][] diag,
			final double[][][] offDiag1,
			final double[][][] offDiag2,
			final double[] v,
			final double[] out,
			final int npars) {
		Arrays.fill(out, 0.0);
		final int nvars = diag.length;
		final Thread[] threads = ImageDtt.newThreadArray(Math.min(QuadCLT.THREADS_MAX, Math.max(1, nvars)));
		final AtomicInteger ai = new AtomicInteger(0);
		for (int ithread = 0; ithread < threads.length; ithread++) {
			threads[ithread] = new Thread() {
				@Override
				public void run() {
					for (int i = ai.getAndIncrement(); i < nvars; i = ai.getAndIncrement()) {
						final int i0 = i * npars;
						for (int r = 0; r < npars; r++) {
							double y = 0.0;
							for (int c = 0; c < npars; c++) {
								y += diag[i][r][c] * v[i0 + c];
							}
							if (i > 0) {
								for (int c = 0; c < npars; c++) {
									y += offDiag1[i - 1][c][r] * v[(i - 1) * npars + c];
								}
							}
							if (i < (nvars - 1)) {
								for (int c = 0; c < npars; c++) {
									y += offDiag1[i][r][c] * v[(i + 1) * npars + c];
								}
							}
							if (i > 1) {
								for (int c = 0; c < npars; c++) {
									y += offDiag2[i - 2][c][r] * v[(i - 2) * npars + c];
								}
							}
							if (i < (nvars - 2)) {
								for (int c = 0; c < npars; c++) {
									y += offDiag2[i][r][c] * v[(i + 2) * npars + c];
								}
							}
							out[i0 + r] = y;
						}
					}
				}
			};
		}
		ImageDtt.startAndJoin(threads);
	}

	private static double dot(final double[] a, final double[] b) {
		final int n = a.length;
		final int nthreads = Math.min(QuadCLT.THREADS_MAX, Math.max(1, n / 256));
		final Thread[] threads = ImageDtt.newThreadArray(nthreads);
		final AtomicInteger ai = new AtomicInteger(0);
		final double[] sums = new double[nthreads];
		for (int ithread = 0; ithread < nthreads; ithread++) {
			final int tid = ithread;
			threads[ithread] = new Thread() {
				@Override
				public void run() {
					double s = 0.0;
					for (int i = ai.getAndIncrement(); i < n; i = ai.getAndIncrement()) {
						s += a[i] * b[i];
					}
					sums[tid] = s;
				}
			};
		}
		ImageDtt.startAndJoin(threads);
		double sum = 0.0;
		for (double s : sums) {
			sum += s;
		}
		return sum;
	}

	private static void axpy(final double alpha, final double[] x, final double[] y) {
		for (int i = 0; i < y.length; i++) {
			y[i] += alpha * x[i];
		}
	}

	private static double normInf(final double[] v) {
		double m = 0.0;
		for (int i = 0; i < v.length; i++) {
			m = Math.max(m, Math.abs(v[i]));
		}
		return m;
	}
}
