CLAUDE: CUAS RT mode-0 path + L2 age/noise/flight-log (pre-JNA-migration checkpoint)

Checkpoint of the CUAS real-time work before the JCuda->JNA GPU-layer migration: - OpticalFlow.buildSeries mode-0 curt_en fork: generate the merged-CUAS stack via CuasRanging.prepareFpixels() (GPU, explicit) then run the CUDA-free CuasDetectRT; coexists with the oracle (oracle gated off when curt_en). - CuasDetectRT: file + in-memory(ImagePlus) entries via shared ingest(); -OFFSET gains an L2 "age" slice (5->6 ch), per-level noise scale, -LEV0 uniform naming, -OFFSET-<model> suffix. - infer_server.py: L2 track-age (masked 5x5 max-pool, AGE_THR=0.2/AGE_K=0.5), per-level noise normalization (sqrt(2)^(L-3) default, Java-sent scale), nch + noise_scale + CMD_STATUS protocol additions; auto model-switch in CuasDnnRemote.ensureServer. - cuasSynth + cuasNoise list SET keys (shared synth dir / inline per-level scales). - CuasRanging.saveUasFlightLogCsv: per-frame UAS truth -> <name>-UAS_DATA.tsv (mode-0 only). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

CLAUDE: CUAS RT mode-0 path + L2 age/noise/flight-log (pre-JNA-migration checkpoint)
Checkpoint of the CUAS real-time work before the JCuda->JNA GPU-layer migration: - OpticalFlow.buildSeries mode-0 curt_en fork: generate the merged-CUAS stack via CuasRanging.prepareFpixels() (GPU, explicit) then run the CUDA-free CuasDetectRT; coexists with the oracle (oracle gated off when curt_en). - CuasDetectRT: file + in-memory(ImagePlus) entries via shared ingest(); -OFFSET gains an L2 "age" slice (5->6 ch), per-level noise scale, -LEV0 uniform naming, -OFFSET-<model> suffix. - infer_server.py: L2 track-age (masked 5x5 max-pool, AGE_THR=0.2/AGE_K=0.5), per-level noise normalization (sqrt(2)^(L-3) default, Java-sent scale), nch + noise_scale + CMD_STATUS protocol additions; auto model-switch in CuasDnnRemote.ensureServer. - cuasSynth + cuasNoise list SET keys (shared synth dir / inline per-level scales). - CuasRanging.saveUasFlightLogCsv: per-frame UAS truth -> <name>-UAS_DATA.tsv (mode-0 only). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
3dfe70ad · Andrey Filippov · 95e25fcc · 3dfe70ad · 3dfe70ad · 3dfe70ad
Commit 3dfe70ad authored Jun 25, 2026 by Andrey Filippov
10 changed files
--- a/src/main/java/com/elphel/imagej/cameras/EyesisCorrectionParameters.java
+++ b/src/main/java/com/elphel/imagej/cameras/EyesisCorrectionParameters.java
@@ -68,7 +68,9 @@ public class EyesisCorrectionParameters {
    			"resultsDirectory", // 6
    			"cuasSeed",         // 7
    			"uasLogs",          // 8
-    			"skyMask"};         // 9  
+    			"skyMask",          // 9
+    			"cuasSynth",        // 10 shared synthetic-grid dir (curt_synth_src), valid for all sequences. By Claude on 06/24/2026
+    			"cuasNoise"};       // 11 INLINE per-level L2 noise-scale numbers (NOT a path); empty -> sqrt default. By Claude on 06/24/2026
    	public static final int KEY_INDEX_ROOT_DIRECTORY =    0;
    	public static final int KEY_INDEX_SOURCE_DIRECTORY =  1;
    	public static final int KEY_INDEX_LINKED_MODELS =     2;
@@ -79,6 +81,8 @@ public class EyesisCorrectionParameters {
    	public static final int KEY_INDEX_CUAS_SEED =         7;
    	public static final int KEY_INDEX_UAS_LOGS =          8;
    	public static final int KEY_INDEX_SKY_MASK =          9;
+    	public static final int KEY_INDEX_CUAS_SYNTH =       10; // By Claude on 06/24/2026
+    	public static final int KEY_INDEX_CUAS_NOISE =       11; // inline per-level noise scales. By Claude on 06/24/2026
    	
    	public static final String AUX_PREFIX = "AUX-";
    	public boolean swapSubchannels01=      true; // false; // (false: 0-1-2, true - 1-0-2)
@@ -129,6 +133,8 @@ public class EyesisCorrectionParameters {
    	public String cuasSeedDir=             "";
    	public boolean useCuasSeedDir=         false;
    	public String cuasSkyMask =            "";  // TIFF image 640x512 where 1.0 - sky, 0.0 - ground, blurred with GB (now sigma==2.0)
+    	public String cuasSynth =              "";  // shared dir holding the synthetic-grid TIFF (curt_synth_src) - valid for ALL sequences, resolved from the list SET key "cuasSynth" (relative to rootDirectory), like cuasSkyMask; empty -> per-sequence model dir (old behavior). By Claude on 06/24/2026
+    	public String cuasNoise =              "";  // INLINE per-level L2 noise-scale numbers from the list SET key "cuasNoise" (e.g. "0.354,0.5,0.707,1.0,1.414,2.0"); NOT a path. Empty -> theoretical sqrt(2)^(L-3) default (computed in CuasDetectRT). By Claude on 06/24/2026
    	public String cuasUasLogs =            "";  // json file path containing UAS logs
    	public double cuasUasTimeStamp =       0.0; // timestamp corresponding to the UAS time 0.0
    	public double [] cuasCameraATR =       {0, 0, 0};
@@ -322,6 +328,8 @@ public class EyesisCorrectionParameters {
  			cp.useCuasSeedDir=    	    this.useCuasSeedDir;
  			
  			cp.cuasSkyMask =            this.cuasSkyMask;
+  			cp.cuasSynth =              this.cuasSynth;
+  			cp.cuasNoise =              this.cuasNoise;
  			cp.cuasUasLogs =            this.cuasUasLogs;
  			cp.cuasUasTimeStamp =       this.cuasUasTimeStamp;
  			cp.cuasCameraATR =          this.cuasCameraATR.clone();
@@ -1833,7 +1841,7 @@ public class EyesisCorrectionParameters {
            		if (dir_map.get(KEY_DIRS[i]).length() > 0){
            			Path dir_path=base_path.resolve(Paths.get(dir_map.get(KEY_DIRS[i])));
            			File dir_file = new File(dir_path.toString());
-            			if ((i != KEY_INDEX_UAS_LOGS) && (i != KEY_INDEX_SKY_MASK)) { // cuasUasLogs, cuasSkyMask are files, not directories
+            			if ((i != KEY_INDEX_UAS_LOGS) && (i != KEY_INDEX_SKY_MASK) && (i != KEY_INDEX_CUAS_SYNTH) && (i != KEY_INDEX_CUAS_NOISE)) { // cuasUasLogs/cuasSkyMask=files; cuasSynth=input dir; cuasNoise=inline numbers (not a path) - don't auto-create. By Claude on 06/24/2026
            				if (!dir_file.exists()) {
            					if (MKDIRS_ALLOW) {
            						dir_file.mkdirs();
@@ -1901,7 +1909,20 @@ public class EyesisCorrectionParameters {
            		case KEY_INDEX_SKY_MASK: // 9:  // cuasSeed
            			this.cuasSkyMask =     dir_string; // dir_path.toString();
            			System.out.println("this.cuasSkyMask=" + this.cuasSkyMask);
-            			
+
+            			break;
+            		case KEY_INDEX_CUAS_SYNTH: // 10: shared synthetic-grid dir (curt_synth_src), all sequences. By Claude on 06/24/2026
+            			this.cuasSynth =       dir_string;
+            			System.out.println("this.cuasSynth=" + this.cuasSynth);
+            			break;
+            		case KEY_INDEX_CUAS_NOISE: // 11: INLINE per-level noise scales (numbers, NOT a path). By Claude on 06/24/2026
+            			{
+            				StringBuilder nsb = new StringBuilder(dir_map.get(KEY_DIRS[i])); // first number
+            				ArrayList<String> nx = extra_map.get(KEY_DIRS[i]);              // remaining numbers
+            				if (nx != null) for (String v : nx) nsb.append(",").append(v);
+            				this.cuasNoise = nsb.toString();
+            			}
+            			System.out.println("this.cuasNoise=" + this.cuasNoise);
            			break;
            		}
            	}

--- a/src/main/java/com/elphel/imagej/cuas/CuasRanging.java
+++ b/src/main/java/com/elphel/imagej/cuas/CuasRanging.java
@@ -2880,8 +2880,66 @@ public class CuasRanging {
 			getCenter_CLT().saveStringInModelDirectory(tsv.toString(), UAS_DATA_SUFFIX, false);
 		}
 	}
-	
-	// relies on calcMatchingTargetsLengths(.., true,...) called from recalcOmegas() to set [RSLT_GLOBAL] 
+
+	/** Standalone UAS flight-log extraction for the mode-0 RT path (NO CuasMotion / targets array): project the
+	 *  DJI flight log to per-scene px,py,range using center_CLT pose + uasLogReader, accumulate a TSV (same columns
+	 *  + UAS_DATA_SUFFIX as addUasData), save via the QuadCLT model dir. Timestamps = imp_targets slice labels
+	 *  (skip leading non-digit "average" slices, == CuasDetectRT.ingest). Needs QuadCLT pose, so mode-0 only (not
+	 *  the mode=3 file path). By Claude on 06/24/2026 */
+	public void saveUasFlightLogCsv(UasLogReader uasLogReader, ImagePlus imp_targets) {
+		if (uasLogReader == null) {
+			System.out.println("saveUasFlightLogCsv(): no UAS log reader - skipping flight-log CSV");
+			return;
+		}
+		if ((imp_targets == null) || (imp_targets.getStackSize() < 1)) {
+			System.out.println("saveUasFlightLogCsv(): no imp_targets - skipping flight-log CSV");
+			return;
+		}
+		// camera reference LLA from center_CLT (mirror addUasData)
+		if ((getCenter_CLT() != null) && getCenter_CLT().hasIns()) {
+			double [] cameraLla = getCenter_CLT().getLla();
+			if ((cameraLla != null) && ((cameraLla[0] != 0.0) || (cameraLla[1] != 0.0))) {
+				uasLogReader.setCameraLLA(cameraLla);
+			}
+		}
+		double [] cam_atr = uasLogReader.getCameraATR();
+		int tilesX = getCenter_CLT().getTileProcessor().getTilesX();
+		int tilesY = getCenter_CLT().getTileProcessor().getTilesY();
+		ij.ImageStack stack = imp_targets.getStack();
+		int num_slices = stack.getSize();
+		int first_slice = 1;
+		for (; (first_slice <= num_slices) && !Character.isDigit(stack.getSliceLabel(first_slice).charAt(0)); first_slice++);
+		StringBuffer sb = new StringBuffer();
+		sb.append("seq\tts\tstatus\tpx\tpy\ttile_x\ttile_y\trange\tlat\tlon\talt\tnorth\teast\tdown\tcam_az\tcam_tilt\tcam_roll\n");
+		int nseq = 0;
+		for (int slice = first_slice; slice <= num_slices; slice++, nseq++) {
+			// slice labels can carry a suffix (e.g. "1773135457.547099-0"); extract the bare timestamp the
+			// same way as QuadCLT.getTimeStamp (regex \d{5,10}\.\d{6}), but keep it a STRING - the double
+			// round-trip would lose microsecond precision at this magnitude. By Claude on 06/24/2026
+			String norm = stack.getSliceLabel(slice).replace("_", ".");
+			java.util.regex.Matcher tm = java.util.regex.Pattern.compile("\\d{5,10}\\.\\d{6}").matcher(norm);
+			String timestamp = tm.find() ? norm.substring(tm.start(), tm.end()) : norm;
+			double [] uas =    uasLogReader.getUasPxPyDRange(timestamp); // px, py, disparity, range
+			double [] llaned = uasLogReader.getUasLlaNed(timestamp);     // lat, lon, alt, N, E, D
+			if (uas != null) {
+				double px = uas[0], py = uas[1], range = uas[3];
+				int tileX = (int) (px / GPUTileProcessor.DTT_SIZE);
+				int tileY = (int) (py / GPUTileProcessor.DTT_SIZE);
+				String status = ((tileX >= 0) && (tileY >= 0) && (tileX < tilesX) && (tileY < tilesY)) ? "IN FoV" : "OUT OF FoV";
+				sb.append(nseq+"\t"+timestamp+"\t"+status+"\t"+px+"\t"+py+"\t"+tileX+"\t"+tileY+"\t"+range+"\t"+
+						llaned[0]+"\t"+llaned[1]+"\t"+llaned[2]+"\t"+llaned[3]+"\t"+llaned[4]+"\t"+llaned[5]+"\t"+
+						cam_atr[0]+"\t"+cam_atr[1]+"\t"+cam_atr[2]+"\n");
+			} else {
+				sb.append(nseq+"\t"+timestamp+"\tno entry\t\t\t\t\t\t"+
+						((llaned != null) ? (llaned[0]+"\t"+llaned[1]+"\t"+llaned[2]) : "\t\t")+
+						"\t\t\t\t"+cam_atr[0]+"\t"+cam_atr[1]+"\t"+cam_atr[2]+"\n");
+			}
+		}
+		getCenter_CLT().saveStringInModelDirectory(sb.toString(), UAS_DATA_SUFFIX, false);
+		System.out.println("saveUasFlightLogCsv(): wrote UAS flight log ("+nseq+" scenes) -> "+getCenter_CLT().getImageName()+UAS_DATA_SUFFIX);
+	}
+
+	// relies on calcMatchingTargetsLengths(.., true,...) called from recalcOmegas() to set [RSLT_GLOBAL]
 	public void saveTargetStats(
 			final double [][][] targets_single) {
 		UasLogReader  uasLogReader = cuasMotion.getUasLogReader();

--- a/src/main/java/com/elphel/imagej/cuas/rt/CuasDetectRT.java
+++ b/src/main/java/com/elphel/imagej/cuas/rt/CuasDetectRT.java
@@ -42,6 +42,9 @@ public class CuasDetectRT {
 //	String []       model_names;
 	QuadCLT         master_CLT;
 	String          fpixels_file = null;
+	String          cuas_synth_dir = null; // shared synth-grid dir (list SET "cuasSynth"); null/empty -> scan model_directory (old per-sequence behavior). By Claude on 06/24/2026
+	String          cuas_noise = "";       // inline per-level L2 noise scales (list SET "cuasNoise"); empty -> theoretical sqrt default. By Claude on 06/24/2026
+	public static final int NOISE_REF_LEVEL = 3;   // reference pyramid level (scale 1.0) the net is calibrated to. By Claude on 06/24/2026
 	String          base_name = null; // add suffix and 
 	float [][]      fpixels; 
 	double [][]     dpixels; // same as fpixels for faster calculations on 64-bit processors 
@@ -51,93 +54,147 @@ public class CuasDetectRT {
 	float [][]      synth_pixels = null; // synthetic reference grid (scaled, NOT LoG'd), injected per pyramid level; null = real-only // By Claude on 06/14/2026
 	double          infinity;
 	
+	// mode=3 (current, fast, bypasses OpticalFlow extras): read the merged-CUAS stack from a file in
+	// model_directory. No GPU code here - the file was produced elsewhere. By Claude on 06/11/2026
 	public CuasDetectRT(
 			CLTParameters     clt_parameters,
 			UasLogReader      uasLogReader,
 			String            model_directory,
+			String            cuas_synth_dir,  // shared synth-grid dir (list SET "cuasSynth"); "" -> per-sequence model_directory. By Claude on 06/24/2026
+			String            cuas_noise,      // inline per-level noise scales (list SET "cuasNoise"); "" -> sqrt default. By Claude on 06/24/2026
 			int               debugLevel) {
 		this.uasLogReader =   uasLogReader;
 		this.clt_parameters = clt_parameters;
 		this.model_directory = model_directory;
+		this.cuas_synth_dir = cuas_synth_dir;
+		this.cuas_noise = (cuas_noise == null) ? "" : cuas_noise;
 		this.infinity = clt_parameters.imp.cuas_infinity;
 		// The REAL stack is always the pyramid base (gives the full-length levels); the synthetic // By Claude on 06/14/2026
-		// reference grid (if curt_synth_src) is loaded separately below and injected per-level.
+		// reference grid (if curt_synth_src) is loaded separately (inside ingest()) and injected per-level.
 		final String fpixels_suffix = SUFFIX_FPIXELS_TIFF; // By Claude on 06/14/2026
 		String [] fpixels_paths = CorrectionParameters.getFilesByExtensionAsArray(model_directory, fpixels_suffix); // By Claude on 06/11/2026
-		
-		String [] dbg_slices=null;
-		float [][] dbg_pixels=null;
-		boolean save_copy = false; // true;
 		if (fpixels_paths.length > 0) {
 			fpixels_file = newestFile(fpixels_paths); // pick up the newest matching file // By Claude on 06/14/2026
 			if (fpixels_paths.length > 1) {
 				System.out.println((fpixels_paths.length)+" files ending with \""+fpixels_suffix+"\" found in "+model_directory+", using the newest: "+fpixels_file); // By Claude on 06/14/2026
 			}
 			ImagePlus imp = new ImagePlus(fpixels_file);
-			width =  imp.getWidth();
-			height = imp.getHeight();
-			if (width > 0) {
-					ImageStack stack = imp.getStack();
-					// Skip non-ts ("average") slices;
-					int first_slice = 1;
-					int num_slices = stack.getSize();
-					for (; !Character.isDigit(stack.getSliceLabel(first_slice).charAt(0)); first_slice++);
-					time_stamps = new String [num_slices - first_slice + 1];
-					if (save_copy) {
-						dbg_slices = new String [num_slices];
-						dbg_pixels = new float [num_slices][];
-						for (int i = 0; i < dbg_slices.length; i++) {
-							dbg_slices[i] = stack.getSliceLabel(1 + i);
-							dbg_pixels[i] = (float[]) stack.getPixels (1 + i);
-						}
-					}
-					fpixels = new float [time_stamps.length][width*height];
-					for (int i = 0; i < time_stamps.length; i++) {
-						time_stamps[i] = stack.getSliceLabel(first_slice + i);
-						fpixels[i] = (float[]) stack.getPixels (first_slice + i);
-					}
-					String fpixels_name = Path.of(fpixels_file).getFileName().toString();
-					base_name = fpixels_name.substring(0,fpixels_name.length() - fpixels_suffix.length()); // By Claude on 06/11/2026
-					if (clt_parameters.imp.curt_subtract_avg) base_name += "-SUBAVG"; // distinct output filenames for average-subtracted runs (don't overwrite) // By Claude on 06/14/2026
-					if (clt_parameters.imp.curt_synth_src) { // load the synthetic reference grid separately (NOT mixed here) // By Claude on 06/14/2026
-						// Real (loaded above) is the pyramid base. The synthetic file holds NORMALIZED
-						// (peak 1) clean targets / a velocity-reference grid: load + scale it, keep it
-						// aside (NO LoG - it has no slowly-varying background), and inject it TILED into
-						// each pyramid level just before conv5d/DNN, so it is never averaged across levels
-						// and looks the same at every level while the real noise is averaged (SNR up ~sqrt2/level).
-						double synth_scale = clt_parameters.imp.curt_synth_scale;
-						String [] synth_paths = CorrectionParameters.getFilesByExtensionAsArray(model_directory, SUFFIX_SYNTH_TIFF);
-						if (synth_paths.length > 0) {
-							String synth_file = newestFile(synth_paths); // pick up the newest synthetic // By Claude on 06/14/2026
-							ImagePlus imp_s = new ImagePlus(synth_file);
-							ImageStack ss = imp_s.getStack();
-							int s_first = 1;
-							for (; !Character.isDigit(ss.getSliceLabel(s_first).charAt(0)); s_first++);
-							int n_synth = ss.getSize() - s_first + 1;
-							synth_pixels = new float [n_synth][];
-							for (int i = 0; i < n_synth; i++) {
-								float [] sp = (float[]) ss.getPixels(s_first + i);
-								float [] sc = new float [sp.length];
-								for (int k = 0; k < sp.length; k++) sc[k] = (float)(sp[k] * synth_scale);
-								synth_pixels[i] = sc;
-							}
-							System.out.println("Synthetic grid: loaded "+n_synth+" frames (scale "+synth_scale+", no LoG) from "+synth_file+
-									" - injected per pyramid level, tiled (synth[j % "+n_synth+"])");
-						} else {
-							System.out.println("curt_synth_src set but no \""+SUFFIX_SYNTH_TIFF+"\" found in "+model_directory+" - real only");
-						}
-						base_name += "-SYNTH"+d2s(synth_scale)+"B"; // real is always the averaged background base // By Claude on 06/14/2026
-					}
-					System.out.println("Read image data from "+fpixels_file);
+			String fpixels_name = Path.of(fpixels_file).getFileName().toString();
+			String core_base = fpixels_name.substring(0, fpixels_name.length() - fpixels_suffix.length()); // By Claude on 06/11/2026
+			ingest(imp, core_base, debugLevel);
+			if (fpixels != null) {
+				System.out.println("Read image data from "+fpixels_file);
 			} else {
 				System.out.println("Failed to read image data from "+fpixels_file);
 				fpixels_file = null;
-				fpixels = null;
 			}
 		} else {
 			System.out.println("No files ending with \""+fpixels_suffix+"\" found in "+model_directory); // By Claude on 06/11/2026
 			fpixels_file = null;
 			fpixels = null;
+			dpixels = null;
+		}
+		return;
+	}
+
+	// mode=0 (OpticalFlow curt_en path): consume a PRE-GENERATED merged-CUAS stack (ImagePlus). CUDA-free -
+	// the GPU generator (CuasRanging.prepareFpixels(), which uses the CUDA tile-processor kernels) is called
+	// explicitly by the caller, where the QuadCLT/GPU context lives, so this class stays independent of the
+	// GPU code (which may be incompatible with a future CUDA). By Claude on 06/24/2026
+	public CuasDetectRT(
+			CLTParameters     clt_parameters,
+			UasLogReader      uasLogReader,
+			ImagePlus         imp_targets,     // pre-generated merged-CUAS stack (no UM); caller owns GPU generation
+			String            model_directory, // where outputs are saved (e.g. center_CLT.getX3dDirectory())
+			String            core_base_name,  // base for output filenames (e.g. center_CLT.getImageName())
+			String            cuas_synth_dir,  // shared synth-grid dir (list SET "cuasSynth"); "" -> per-sequence model_directory. By Claude on 06/24/2026
+			String            cuas_noise,      // inline per-level noise scales (list SET "cuasNoise"); "" -> sqrt default. By Claude on 06/24/2026
+			int               debugLevel) {
+		this.uasLogReader =   uasLogReader;
+		this.clt_parameters = clt_parameters;
+		this.model_directory = model_directory;
+		this.cuas_synth_dir = cuas_synth_dir;
+		this.cuas_noise = (cuas_noise == null) ? "" : cuas_noise;
+		this.infinity = clt_parameters.imp.cuas_infinity;
+		if ((imp_targets == null) || (imp_targets.getWidth() <= 0)) {
+			System.out.println("CuasDetectRT(in-memory): null/empty imp_targets - nothing to do");
+			fpixels = null;
+			dpixels = null;
+			return;
+		}
+		System.out.println("CuasDetectRT(in-memory): ingesting generated merged stack \""+imp_targets.getTitle()+
+				"\" ("+imp_targets.getStackSize()+" slices) - outputs -> "+model_directory);
+		ingest(imp_targets, core_base_name, debugLevel);
+		return;
+	}
+
+	/** Populate fpixels/dpixels/width/height/time_stamps/base_name from a merged-CUAS ImagePlus
+	 *  (file-loaded for mode=3 or generated in memory for mode=0). Shared by both constructors;
+	 *  contains NO GPU code. By Claude on 06/24/2026 */
+	private void ingest(ImagePlus imp, String core_base_name, int debugLevel) {
+		String [] dbg_slices=null;
+		float [][] dbg_pixels=null;
+		boolean save_copy = false; // true;
+		width =  (imp != null) ? imp.getWidth()  : 0;
+		height = (imp != null) ? imp.getHeight() : 0;
+		if (width > 0) {
+				ImageStack stack = imp.getStack();
+				// Skip non-ts ("average") slices;
+				int first_slice = 1;
+				int num_slices = stack.getSize();
+				for (; !Character.isDigit(stack.getSliceLabel(first_slice).charAt(0)); first_slice++);
+				time_stamps = new String [num_slices - first_slice + 1];
+				if (save_copy) {
+					dbg_slices = new String [num_slices];
+					dbg_pixels = new float [num_slices][];
+					for (int i = 0; i < dbg_slices.length; i++) {
+						dbg_slices[i] = stack.getSliceLabel(1 + i);
+						dbg_pixels[i] = (float[]) stack.getPixels (1 + i);
+					}
+				}
+				fpixels = new float [time_stamps.length][width*height];
+				for (int i = 0; i < time_stamps.length; i++) {
+					time_stamps[i] = stack.getSliceLabel(first_slice + i);
+					fpixels[i] = (float[]) stack.getPixels (first_slice + i);
+				}
+				base_name = core_base_name;
+				if (clt_parameters.imp.curt_subtract_avg) base_name += "-SUBAVG"; // distinct output filenames for average-subtracted runs (don't overwrite) // By Claude on 06/14/2026
+				if (clt_parameters.imp.curt_synth_src) { // load the synthetic reference grid separately (NOT mixed here) // By Claude on 06/14/2026
+					// Real (loaded above) is the pyramid base. The synthetic file holds NORMALIZED
+					// (peak 1) clean targets / a velocity-reference grid: load + scale it, keep it
+					// aside (NO LoG - it has no slowly-varying background), and inject it TILED into
+					// each pyramid level just before conv5d/DNN, so it is never averaged across levels
+					// and looks the same at every level while the real noise is averaged (SNR up ~sqrt2/level).
+					double synth_scale = clt_parameters.imp.curt_synth_scale;
+					// Synthetic grid is sequence-INDEPENDENT: read it from the shared dir (list SET "cuasSynth")
+					// when set; otherwise fall back to the per-sequence model_directory (old behavior / single-
+					// sequence mode where the SET key is absent). By Claude on 06/24/2026
+					String synth_dir = ((cuas_synth_dir != null) && !cuas_synth_dir.isEmpty()) ? cuas_synth_dir : model_directory;
+					String [] synth_paths = CorrectionParameters.getFilesByExtensionAsArray(synth_dir, SUFFIX_SYNTH_TIFF);
+					if (synth_paths.length > 0) {
+						String synth_file = newestFile(synth_paths); // pick up the newest synthetic // By Claude on 06/14/2026
+						ImagePlus imp_s = new ImagePlus(synth_file);
+						ImageStack ss = imp_s.getStack();
+						int s_first = 1;
+						for (; !Character.isDigit(ss.getSliceLabel(s_first).charAt(0)); s_first++);
+						int n_synth = ss.getSize() - s_first + 1;
+						synth_pixels = new float [n_synth][];
+						for (int i = 0; i < n_synth; i++) {
+							float [] sp = (float[]) ss.getPixels(s_first + i);
+							float [] sc = new float [sp.length];
+							for (int k = 0; k < sp.length; k++) sc[k] = (float)(sp[k] * synth_scale);
+							synth_pixels[i] = sc;
+						}
+						System.out.println("Synthetic grid: loaded "+n_synth+" frames (scale "+synth_scale+", no LoG) from "+synth_file+
+								" - injected per pyramid level, tiled (synth[j % "+n_synth+"])");
+					} else {
+						System.out.println("curt_synth_src set but no \""+SUFFIX_SYNTH_TIFF+"\" found in "+model_directory+" - real only");
+					}
+					base_name += "-SYNTH"+d2s(synth_scale)+"B"; // real is always the averaged background base // By Claude on 06/14/2026
+				}
+		} else {
+			System.out.println("CuasDetectRT.ingest(): empty/invalid merged stack - no pixel data");
+			fpixels = null;
 		}
 		if (fpixels != null) {
 			dpixels = new double [fpixels.length][width*height];
@@ -679,6 +736,11 @@ public class CuasDetectRT {
 			Rectangle     roi,
 			String        title_conv5d) {
 		final int dnn_stride = Math.max(1, clt_parameters.imp.curt_dnn_stride);
+		final boolean use_l2 = clt_parameters.imp.curt_dnn_l2;                       // run Layer-2 on the DGX: -OFFSET carries L2 det+vel instead of L1. By Claude 06/22/2026
+		final String  l2model = use_l2 ? clt_parameters.imp.curt_dnn_l2_model : "";  // L2 run dir on the DGX; empty = L1-only (old way)
+		// L2 model id appended as the LAST filename suffix on -OFFSET so runs of different L2 models don't
+		// collide (and Dolphin's begin...end.tiff truncation still shows the model). By Claude on 06/24/2026
+		final String  l2tag = use_l2 ? ("-" + l2model.substring(l2model.lastIndexOf('/') + 1)) : "";
 		final int W = getWidth(), H = getHeight();
 		final boolean save_rect  = clt_parameters.imp.curt_save_c5rect;
 		final boolean save_hyper = clt_parameters.imp.curt_save_c5hyper;
@@ -686,7 +748,7 @@ public class CuasDetectRT {
 		final boolean synth_bg = clt_parameters.imp.curt_synth_bg;
 		final int n_synth = synth ? synth_pixels.length : 0;
 		try { // auto-launch the DGX server if not already running (deploy bundled/override scripts, ssh-start, poll) // By Claude on 06/20/2026
-			CuasDnnRemote.ensureServer(clt_parameters.imp.curt_dnn_remote_host, clt_parameters.imp.curt_dnn_remote_model, clt_parameters.imp.curt_dnn_remote_srcdir);
+			CuasDnnRemote.ensureServer(clt_parameters.imp.curt_dnn_remote_host, clt_parameters.imp.curt_dnn_remote_model, l2model, clt_parameters.imp.curt_dnn_remote_srcdir);
 		} catch (Exception e) { System.out.println("runDnnRemote(): server auto-launch failed: "+e); }
 		try (CuasDnnRemote remote = new CuasDnnRemote(clt_parameters.imp.curt_dnn_remote_host)) {
 			// Build the upload array: LoG-conditioned real (optionally synth_bg_avg-decimated upstream), optionally
@@ -713,6 +775,26 @@ public class CuasDetectRT {
 					+ java.util.Arrays.toString(levCounts) + " N="+N_dnn
 					+ "  ("+(System.currentTimeMillis()-tup)+" ms, "+(dpixels_log.length*(long)H*W*4/1000000)+" MB)");
 			int nlevels = Math.min(pyramid_levels, levCounts.length);
+			// Per-level L1-input NOISE SCALES (single source of truth = here in Java; the server applies exactly what
+			// we send). Parse the inline cuasNoise SET; validate (>= nlevels positive numbers) else WARN + fall back to
+			// the theoretical sqrt(2)^(L-NOISE_REF_LEVEL). Recorded in every -OFFSET's metadata + printed near the end.
+			// These are critical/sensitive params - never silently misapply. By Claude on 06/24/2026
+			double [] noise_scales = new double [nlevels];
+			boolean noise_default = true;
+			if ((cuas_noise != null) && !cuas_noise.trim().isEmpty()) {
+				String [] ntok = cuas_noise.trim().split("[\\s,;]+");
+				boolean nok = (ntok.length >= nlevels);
+				double [] np = new double [ntok.length];
+				for (int i = 0; nok && (i < ntok.length); i++) {
+					try { np[i] = Double.parseDouble(ntok[i]); if (np[i] <= 0) nok = false; }
+					catch (NumberFormatException e) { nok = false; }
+				}
+				if (nok) { for (int L = 0; L < nlevels; L++) noise_scales[L] = np[L]; noise_default = false; }
+				else System.out.println("runDnnRemote(): WARNING cuasNoise=\""+cuas_noise+"\" invalid (need >= "+nlevels+" positive numbers) - falling back to theoretical sqrt scales");
+			}
+			if (noise_default) for (int L = 0; L < nlevels; L++) noise_scales[L] = Math.pow(2.0, (L - NOISE_REF_LEVEL) / 2.0);
+			final String noise_src = noise_default ? ("theoretical sqrt(2)^(L-"+NOISE_REF_LEVEL+")") : "cuasNoise SET";
+			final String noise_meta = java.util.Arrays.toString(noise_scales)+" ["+noise_src+", ref LEV"+NOISE_REF_LEVEL+"]";
 			for (int nlev = 0; nlev < nlevels; nlev++) {
 				if (!c5LevelSelected(c5_levels, nlev) || (ts_pyramid[nlev] == null)) continue;
 				int levLen = levCounts[nlev];
@@ -725,7 +807,8 @@ public class CuasDetectRT {
 				if (num <= 0) continue;
 				int rnp = roi.width * roi.height;
 				double [][][][] dnn_roi = new double [num][][][];     // [scene][roi_pix][1][nvel] -> -RECT/-HYPER-RECT
-				double [][][]   off5    = new double [5][num][H*W];    // {dx,dy,s,Vx,Vy} full-frame -> -OFFSET
+				final int n_off = use_l2 ? 6 : 5;                       // {dx,dy,s,Vx,Vy} (+L2 age when L2) full-frame -> -OFFSET. By Claude 06/24/2026
+				double [][][]   off5    = new double [n_off][num][H*W];
 				String [] ts_dnn = new String [num];
 				System.out.println(now()+" runDnnRemote(): LEV"+nlev+" "+num+" of "+num_all+" scenes, stride "+dnn_stride+", ROI "+roi.width+"x"+roi.height);
 				// batched: request the level's in-window scenes in chunks; the DGX runs them continuously
@@ -738,14 +821,14 @@ public class CuasDetectRT {
 					int cnt = Math.min(REQ, num - j0);
 					int startNewest = (w0 + j0)*dnn_stride + N_dnn - 1;   // absolute newest of the chunk's first scene
 					long t0 = System.currentTimeMillis();
-					CuasDnnRemote.BatchResult br = remote.inferBatch(nlev, startNewest, cnt, dnn_stride, roi, rmax);
+					CuasDnnRemote.BatchResult br = remote.inferBatch(nlev, startNewest, cnt, dnn_stride, roi, rmax, use_l2, use_l2 && (j0 == 0), noise_scales[nlev]);
 					long t1 = System.currentTimeMillis();
 					for (int jj = 0; jj < cnt; jj++) {
 						int j = j0 + jj;
 						double [][][] fld = new double [rnp][1][br.nvel];
 						for (int p = 0; p < rnp; p++) { float [] rv = br.roiField[jj][p]; double [] dv = fld[p][0]; for (int v = 0; v < br.nvel; v++) dv[v] = rv[v]; }
 						dnn_roi[j] = fld;
-						for (int c = 0; c < 5; c++) { float [] sc = br.offset5[jj][c]; double [] dc = off5[c][j]; for (int p = 0; p < H*W; p++) dc[p] = sc[p]; }
+						for (int c = 0; c < Math.min(br.nch, off5.length); c++) { float [] sc = br.offset5[jj][c]; double [] dc = off5[c][j]; for (int p = 0; p < H*W; p++) dc[p] = sc[p]; }
 						int newest = (w0 + j)*dnn_stride + N_dnn - 1;
 						ts_dnn[j] = ts_pyramid[nlev][newest] + " f"+newest;
 					}
@@ -753,35 +836,42 @@ public class CuasDetectRT {
 							+": gpu="+d2s(br.gpuMs)+"ms ("+d2s(br.gpuMs/cnt)+"ms/scene) roundtrip="+(t1-t0)+"ms");
 				}
 				String roiTag = "-ROI"+roi.x+"_"+roi.y+"_"+roi.width+"_"+roi.height;
-				String title = title_conv5d+"-DNN"+((nlev>0)?("-LEV"+nlev):"")+roiTag;
+				String title = title_conv5d+"-DNN"+(use_l2?"-L2":"")+"-LEV"+nlev+roiTag; // -LEV0 too, for uniform level indexing (was: no tag for level 0). By Claude on 06/24/2026
 				int [] win_dnn = timeWindow(ts_dnn);
 				double [][][][] dnn_w = win4(dnn_roi, win_dnn); String [] ts_w = winS(ts_dnn, win_dnn);
 				if (save_rect)  QuadCLTCPU.saveImagePlusInDirectory(tagCuasImp(cuasRTUtils.showConvKernel5d(         dnn_w, roi, ts_w, title+"-RECT"),       clt_parameters.imp), getModelDirectory());
 				if (save_hyper) QuadCLTCPU.saveImagePlusInDirectory(tagCuasImp(cuasRTUtils.showConvKernel5dHyperRect(dnn_w, roi, ts_w, title+"-HYPER-RECT"), clt_parameters.imp), getModelDirectory());
 				int nsc = win_dnn[1]-win_dnn[0];
-				// -OFFSET reordered to {s,Vx,Vy,dx,dy}: s first so ImageJ auto-ranges on it (s shows the targets best). off5 is [dx,dy,s,Vx,Vy]. By Claude on 06/20/2026
-				final int [] ord = {2, 3, 4, 0, 1};   // [dx,dy,s,Vx,Vy] -> [s,Vx,Vy,dx,dy]
-				// NaN Vx,Vy,dx,dy where s < curt_dnn_thresh so velocity shows only at detections (ImageJ ignores NaN); keep s full. By Claude on 06/20/2026
+				// -OFFSET reordered to {s,Vx,Vy,dx,dy}(+age when L2): s first so ImageJ auto-ranges on it. off5 is
+				// [dx,dy,s,Vx,Vy(,age)]. By Claude on 06/20/2026, +L2 age 06/24/2026
+				final int []    ord        = use_l2 ? new int[]{2, 3, 4, 0, 1, 5} : new int[]{2, 3, 4, 0, 1};
+				final String [] off_labels = use_l2 ? new String[]{"s","Vx","Vy","dx","dy","age"} : new String[]{"s","Vx","Vy","dx","dy"};
+				// NaN Vx,Vy,dx,dy where s < curt_dnn_thresh so velocity shows only at detections (ImageJ ignores NaN); keep s + age full. By Claude on 06/20/2026
 				final double off_thr = clt_parameters.imp.curt_dnn_thresh;
 				final double velScale = 1.0 / Math.max(1, clt_parameters.imp.curt_vel_decimate);   // Vx,Vy: cells -> px/level-frame (1/vel_decimate) // By Claude on 06/20/2026
-				double [][][] off5_w = new double [5][nsc][];          // window the full-frame offset for -OFFSET
+				double [][][] off5_w = new double [ord.length][nsc][];   // window the full-frame offset for -OFFSET
 				for (int k = 0; k < nsc; k++) {
 					double [] sCh = off5[2][win_dnn[0]+k];             // s channel (off5 index 2)
-					for (int c = 0; c < 5; c++) {
+					for (int c = 0; c < ord.length; c++) {
 						double [] src = off5[ord[c]][win_dnn[0]+k]; double [] dst = new double [src.length];
-						double scl = ((c == 1) || (c == 2)) ? velScale : 1.0;   // c1=Vx, c2=Vy cells->px; s/dx/dy unscaled
-						if (c == 0) System.arraycopy(src, 0, dst, 0, src.length);   // c0 = s: keep all, no scale/NaN
+						double scl = ((c == 1) || (c == 2)) ? velScale : 1.0;   // c1=Vx, c2=Vy cells->px; s/dx/dy/age unscaled
+						boolean keepFull = (c == 0) || (use_l2 && (c == 5));    // s and age shown full (not NaN-gated by s) // By Claude 06/24/2026
+						if (keepFull) System.arraycopy(src, 0, dst, 0, src.length);
 						else for (int p = 0; p < src.length; p++) dst[p] = (sCh[p] >= off_thr) ? src[p] * scl : Double.NaN;
 						off5_w[c][k] = dst;
 					}
 				}
-				ImagePlus impOff = ShowDoubleFloatArrays.showArraysHyperstack(off5_w, W, title+"-OFFSET", ts_w, new String[]{"s","Vx","Vy","dx","dy"}, false);
+				ImagePlus impOff = ShowDoubleFloatArrays.showArraysHyperstack(off5_w, W, title+"-OFFSET"+l2tag, ts_w, off_labels, false);
 				tagCuasImp(impOff, clt_parameters.imp);
 				impOff.setProperty("curt_save_select", new Rectangle(0,0,W,H).toString());   // -OFFSET is full-frame: record its real extent, not the 70x20 ROI // By Claude on 06/20/2026
+				impOff.setProperty("curt_dnn_noise_scales", noise_meta);                     // ALL per-level scales (provenance: recover what each layer used). By Claude on 06/24/2026
 				com.elphel.imagej.readers.EyesisTiff.encodeProperiesToInfo(impOff);           // re-encode Info with the override
 				QuadCLTCPU.saveImagePlusInDirectory(impOff, getModelDirectory());
-				System.out.println(now()+" runDnnRemote(): LEV"+nlev+" saved -RECT/-HYPER-RECT (ROI) + -OFFSET (full "+W+"x"+H+", {s,Vx,Vy,dx,dy})");
+				System.out.println(now()+" runDnnRemote(): LEV"+nlev+" saved -RECT/-HYPER-RECT (ROI) + -OFFSET (full "+W+"x"+H+", {"+String.join(",", off_labels)+"})");
 			}
+			// Per-level noise scales used this run (printed near the END so it is easy to locate; also in each -OFFSET
+			// metadata as curt_dnn_noise_scales). Critical/sensitive - keep visible. By Claude on 06/24/2026
+			System.out.println(now()+" runDnnRemote(): PER-LEVEL NOISE SCALES = "+noise_meta);
 		} catch (Exception e) {
 			System.out.println("runDnnRemote() failed: "+e); e.printStackTrace();
 		}

--- a/src/main/java/com/elphel/imagej/cuas/rt/CuasDnnRemote.java
+++ b/src/main/java/com/elphel/imagej/cuas/rt/CuasDnnRemote.java
@@ -29,7 +29,7 @@ import java.nio.file.Files;
 *   BYE     : cmd=0
 */
 public class CuasDnnRemote implements AutoCloseable {
-	private static final int CMD_BYE = 0, CMD_UPLOAD = 1, CMD_INFER = 2, CMD_READBACK = 3;
+	private static final int CMD_BYE = 0, CMD_UPLOAD = 1, CMD_INFER = 2, CMD_READBACK = 3, CMD_STATUS = 4;
 	private final Socket sock;
 	private final DataInputStream  in;
 	private final DataOutputStream out;
@@ -73,27 +73,30 @@ public class CuasDnnRemote implements AutoCloseable {
 	 *  the GPU) + the total pure-GPU compute ms (continuous = production throughput). */
 	public static class BatchResult {
 		public double gpuMs;
-		public int H, W, count, nvel, rh, rw;
-		public float [][][] offset5;    // [count][5][H*W]
+		public int H, W, count, nvel, rh, rw, nch;   // nch = offset channels: 5 {dx,dy,s,Vx,Vy} or 6 (+L2 age). By Claude 06/24/2026
+		public float [][][] offset5;    // [count][nch][H*W]
 		public float [][][] roiField;   // [count][rh*rw][nvel]
 	}

 	/** Infer `count` scenes of a level in one round-trip (newest_s = start + s*stride). rmaxCells>0
 	 *  enables the on-GPU ghostbuster (== CuasDetectRT.dnnGhostbust). Keep `count` modest so the
 	 *  reply byte[] stays < 2GB (count*5*H*W*4): count<=64 is ~419MB at 640x512. */
-	public BatchResult inferBatch(int level, int start, int count, int stride, Rectangle roi, double rmaxCells) throws Exception {
+	public BatchResult inferBatch(int level, int start, int count, int stride, Rectangle roi, double rmaxCells,
+			boolean l2Enable, boolean l2Reset, double noiseScale) throws Exception {
 		out.writeInt(CMD_INFER); out.writeInt(level); out.writeInt(start); out.writeInt(count); out.writeInt(stride);
 		out.writeInt(roi.x); out.writeInt(roi.y); out.writeInt(roi.width); out.writeInt(roi.height);
 		out.writeDouble(rmaxCells);
+		out.writeInt(l2Enable ? 1 : 0); out.writeInt(l2Reset ? 1 : 0);   // run Layer-2 on the DGX; reset hidden state at a level's first chunk. By Claude 06/22/2026
+		out.writeDouble(noiseScale);   // per-level L1-input noise scale (Java is the source of truth; <=0 -> server sqrt fallback). By Claude 06/24/2026
 		out.flush();
 		BatchResult r = new BatchResult();
 		r.gpuMs = in.readDouble(); r.H = in.readInt(); r.W = in.readInt();
-		r.count = in.readInt(); r.nvel = in.readInt(); r.rh = in.readInt(); r.rw = in.readInt();
+		r.count = in.readInt(); r.nch = in.readInt(); r.nvel = in.readInt(); r.rh = in.readInt(); r.rw = in.readInt();   // +nch. By Claude 06/24/2026
 		int hw = r.H * r.W, rn = r.rh * r.rw;
-		byte [] ob = new byte [r.count * 5 * hw * 4]; in.readFully(ob);
+		byte [] ob = new byte [r.count * r.nch * hw * 4]; in.readFully(ob);
 		ByteBuffer obb = ByteBuffer.wrap(ob);
-		r.offset5 = new float [r.count][5][hw];
-		for (int s = 0; s < r.count; s++) for (int c = 0; c < 5; c++) for (int p = 0; p < hw; p++) r.offset5[s][c][p] = obb.getFloat();
+		r.offset5 = new float [r.count][r.nch][hw];
+		for (int s = 0; s < r.count; s++) for (int c = 0; c < r.nch; c++) for (int p = 0; p < hw; p++) r.offset5[s][c][p] = obb.getFloat();
 		byte [] rb = new byte [r.count * rn * r.nvel * 4]; in.readFully(rb);
 		ByteBuffer rbb = ByteBuffer.wrap(rb);
 		r.roiField = new float [r.count][rn][r.nvel];
@@ -148,21 +151,70 @@ public class CuasDnnRemote implements AutoCloseable {
 		if (p.waitFor() != 0) throw new Exception("deploy " + name + " to " + sshTarget + ":" + dest + " failed");
 	}

-	/** Ensure the DGX server is reachable at hostPort; if not, deploy the (bundled/override) scripts and
-	 *  ssh-launch run_infer_server.sh with RUN=model, then poll until it accepts connections. No manual step. */
-	public static void ensureServer(String hostPort, String model, String srcdir) throws Exception {
+	/** Query the running server for its loaded {L1 model, L2 model} paths (L2="" if L1-only), or null
+	 *  if unreachable or the server is too old to answer CMD_STATUS. A short read timeout keeps an old
+	 *  server (which ignores the opcode and never replies) from hanging the client. By Claude 06/24/2026 */
+	private static String [] queryModels(String host, int port) {
+		try (Socket s = new Socket()) {
+			s.connect(new InetSocketAddress(host, port), 1500);
+			s.setSoTimeout(2500);
+			DataOutputStream o = new DataOutputStream(s.getOutputStream());
+			DataInputStream  i = new DataInputStream (s.getInputStream());
+			o.writeInt(CMD_STATUS); o.flush();
+			int n1 = i.readInt(); byte [] b1 = new byte [n1]; i.readFully(b1);
+			int n2 = i.readInt(); byte [] b2 = new byte [n2]; i.readFully(b2);
+			try { o.writeInt(CMD_BYE); o.flush(); } catch (Exception e) { /* ignore */ }
+			return new String [] { new String(b1, java.nio.charset.StandardCharsets.UTF_8),
+			                       new String(b2, java.nio.charset.StandardCharsets.UTF_8) };
+		} catch (Exception e) {
+			return null;                                                          // unreachable, or old server (status read timed out)
+		}
+	}
+
+	/** ssh-stop the DGX server (docker rm -f) so the next launch can load a different model. By Claude 06/24/2026 */
+	private static void stopServer(String sshTarget, String code, int port) throws Exception {
+		ProcessBuilder pb = new ProcessBuilder("ssh", sshTarget,
+				"cd " + code + " && PORT=" + port + " ./run_infer_server.sh stop");
+		pb.inheritIO();
+		pb.start().waitFor();
+	}
+
+	/** Ensure the DGX server is up at hostPort WITH the requested L1+L2 models; if a different model is
+	 *  loaded (or the server is an old build that can't report), tear it down and relaunch. Then deploy the
+	 *  (bundled/override) scripts and ssh-launch run_infer_server.sh with RUN=model, polling until it accepts
+	 *  connections. No manual step. All decisions logged to System.out (-> ImageJ log file). By Claude 06/24/2026 */
+	public static void ensureServer(String hostPort, String model, String l2model, String srcdir) throws Exception {
 		String [] hp = hostPort.split(":");
 		String host = hp[0].trim();
 		int port = (hp.length > 1) ? Integer.parseInt(hp[1].trim()) : 5577;
-		if (canConnect(host, port, 1500)) return;                                 // already up
 		String sshTarget = "elphel@" + host;                                      // DGX login user
 		String code = "/home/elphel/c5p_dnn";                                     // DGX dir (model.py + runs/ live here)
-		System.out.println("CuasDnnRemote.ensureServer(): no server at " + host + ":" + port
-				+ " - deploying scripts + launching on " + sshTarget + " (model=" + model + ")");
+		String wantL2 = (l2model == null) ? "" : l2model;                         // normalize (server reports "" for L1-only)
+		if (canConnect(host, port, 1500)) {                                       // a server is listening - is it the right one?
+			String [] cur = queryModels(host, port);
+			if ((cur != null) && cur[0].equals(model) && cur[1].equals(wantL2)) {
+				System.out.println("CuasDnnRemote.ensureServer(): server at " + host + ":" + port
+						+ " already loaded with matching models (L1=" + model + ", L2=" + (wantL2.isEmpty()?"off":wantL2) + ") - reusing");
+				return;
+			}
+			if (cur == null) {
+				System.out.println("CuasDnnRemote.ensureServer(): server at " + host + ":" + port
+						+ " up but did not report models (old build) - restarting for L1=" + model + ", L2=" + (wantL2.isEmpty()?"off":wantL2));
+			} else {
+				System.out.println("CuasDnnRemote.ensureServer(): server model MISMATCH at " + host + ":" + port
+						+ " (has L1=" + cur[0] + ", L2=" + (cur[1].isEmpty()?"off":cur[1])
+						+ "; want L1=" + model + ", L2=" + (wantL2.isEmpty()?"off":wantL2) + ") - restarting");
+			}
+			stopServer(sshTarget, code, port);                                    // free the port so the relaunch loads the new model
+		}
+		String run2 = !wantL2.isEmpty() ? (" RUN2=" + wantL2) : "";               // optional Layer-2. By Claude 06/22/2026
+		System.out.println("CuasDnnRemote.ensureServer(): launching server on " + sshTarget
+				+ " (model=" + model + ", l2=" + (run2.isEmpty()?"off":wantL2) + ")");
 		deployScript("infer_server.py",     srcdir, sshTarget, code + "/infer_server.py");
+		deployScript("layer2.py",            srcdir, sshTarget, code + "/layer2.py");   // Layer-2 model module (Layer2Net). By Claude 06/22/2026
 		deployScript("run_infer_server.sh",  srcdir, sshTarget, code + "/run_infer_server.sh");
 		ProcessBuilder pb = new ProcessBuilder("ssh", sshTarget,
-				"cd " + code + " && chmod +x run_infer_server.sh && RUN=" + model + " PORT=" + port + " ./run_infer_server.sh start");
+				"cd " + code + " && chmod +x run_infer_server.sh && RUN=" + model + run2 + " PORT=" + port + " ./run_infer_server.sh start");
 		pb.inheritIO();
 		pb.start().waitFor();
 		long deadline = System.currentTimeMillis() + 90000;                       // model load + warm-up can take a bit

--- a/src/main/java/com/elphel/imagej/tileprocessor/IntersceneMatchParameters.java
+++ b/src/main/java/com/elphel/imagej/tileprocessor/IntersceneMatchParameters.java
@@ -1168,6 +1168,8 @@ min_str_neib_fpn	0.35
 	public String   curt_dnn_remote_host = "192.168.0.62:5577"; // DGX inference server host:port for curt_dnn_remote (see attic/imagej-elphel-internal/c5p_dnn/infer_server.py) // By Claude on 06/20/2026
 	public String   curt_dnn_remote_model = "runs/weighted9_pm_s"; // DGX-side run dir (with model.pt) the auto-launched server loads (passed as RUN=) // By Claude on 06/20/2026
 	public String   curt_dnn_remote_srcdir = ""; // server-scripts override dir: empty = bundled jar resource (cuas_dnn/), set = local dir - same default-vs-override scheme as the GPU kernels (cuda_project_directory) // By Claude on 06/20/2026
+	public boolean  curt_dnn_l2 =          false; // run the trained Layer-2 (track-before-detect ConvGRU) on the DGX after L1: -OFFSET then carries L2 {det,Vx,Vy} (L1's full-res, NON-ghostbusted field fed in, recurrence over the scene/time axis). Off = L1 offset5 as before (re-run with this off to inspect L1). Requires curt_dnn_remote. // By Claude on 06/22/2026
+	public String   curt_dnn_l2_model =    "runs/l2_v1"; // DGX-side Layer-2 run dir (with model.pt) the auto-launched server loads (passed as RUN2=) // By Claude on 06/22/2026
 	public boolean  curt_dnn_recur_splat = false; // when feeding the DNN field to the recurrent layer: false = feed per-pixel field as-is; true = splat each pixel's velocity vector to its fractional offset (px+dx,py+dy) so neighbours reinforce in one sub-pixel bin // By Claude on 06/14/2026
 	public double   curt_dnn_recur_scale = 10.0;  // multiply the DNN field (softmax*s, peaks ~0.1) by this before the recurrent feed, to reach the recurrent's tuned scale (rs_min=1.0); ~10 -> peak ~1.0. Alternative to lowering curt_recur_rs_min // By Claude on 06/14/2026
 	public boolean  curt_synth_src =       true;  // default set for the synthetic B-measurement experiment (set false for real-data runs); reads *-CUAS-SYNTHETIC-CUAS.tiff, output titles get -SYNTH // By Claude on 06/12/2026
@@ -2702,10 +2704,10 @@ min_str_neib_fpn	0.35
 				"Maximal gain for motion blur correction (if needed more for 1 pixel, increase offset). Will be forced fro the last adjustment");
 		gd.addNumericField("Maximal gain pose",                      this.mb_max_gain_inter, 5,7,"x",
 				"Maximal gain for motion blur correction during interscene correlation. Will be used for all but the last adjustment.");
-		gd.addTab("CUAS","CUAS Parameters");
-		gd.addCheckbox ("Enable targets processing",                 this.cuas_targets_en,
+		gd.addTab("CUAS Oracle","CUAS OracleParameters");
+		gd.addCheckbox ("Enable Oracle targets processing",          this.cuas_targets_en,
 				"Enable extraction and processing targets.");
-		gd.addCheckbox ("Re-calculate center CLT",                    this.cuas_update_existing,
+		gd.addCheckbox ("Re-calculate center CLT",                   this.cuas_update_existing,
 				"Re-create center_CLT if it exists (FIXME: accumulates errors - need fixing).");
 		gd.addNumericField("Discard margins",                        this.cuas_discard_border, 0,3,"pix",
 				"Discards this number of pixels from each side when merging images.");
@@ -3478,6 +3480,10 @@ min_str_neib_fpn	0.35
 				"DGX-side run directory (containing model.pt) the auto-launched server loads, e.g. runs/weighted9_pm_s.");
 		gd.addStringField ("DNN remote server src (empty=bundled)",  this.curt_dnn_remote_srcdir, 40, // By Claude on 06/20/2026
 				"Override dir for the DGX server scripts (infer_server.py / run_infer_server.sh): empty = bundled jar resource (cuas_dnn/); set = local dir. Same default-vs-override (bundled resource vs local repo) scheme as the GPU kernels - bundled is the working version, refresh it after server-script dev.");
+		gd.addCheckbox    ("DNN Layer-2 (run on DGX)",               this.curt_dnn_l2, // By Claude on 06/22/2026
+				"Run the trained Layer-2 track-before-detect ConvGRU on the DGX after L1 (requires 'DNN remote'). -OFFSET then shows L2 {det,Vx,Vy} (L1's full-res non-ghostbusted field fed in, recurrence over time), titled -DNN-L2-. Uncheck to re-run the old L1 way."); // By Claude on 06/22/2026
+		gd.addStringField ("DNN Layer-2 model (DGX run dir)",        this.curt_dnn_l2_model, 24, // By Claude on 06/22/2026
+				"DGX-side Layer-2 run directory (containing model.pt) the auto-launched server loads as RUN2=, e.g. runs/l2_v1.");
 		gd.addNumericField("DNN s-threshold (VIZ ONLY)",             this.curt_dnn_thresh, 6,8,"", // By Claude on 06/13/2026, viz-only 06/20/2026
 				"VISUALIZATION ONLY - NaN's the -OFFSET Vx,Vy,dx,dy where s < this (0 = show all) so velocity shows only at detections. Does NOT gate Layer 2 (the recurrent always sees the full field) nor the -RECT/-HYPER-RECT data. Do NOT use for critical computation - it is a display mask.");
 		gd.addCheckbox ("DNN recurrent feed: offset-splat",          this.curt_dnn_recur_splat, // By Claude on 06/14/2026
@@ -5026,6 +5032,8 @@ min_str_neib_fpn	0.35
 		this.curt_dnn_remote_host =     gd.getNextString().trim(); // By Claude on 06/20/2026
 		this.curt_dnn_remote_model =    gd.getNextString().trim(); // By Claude on 06/20/2026
 		this.curt_dnn_remote_srcdir =   gd.getNextString().trim(); // By Claude on 06/20/2026
+		this.curt_dnn_l2 =              gd.getNextBoolean();     // By Claude on 06/22/2026
+		this.curt_dnn_l2_model =        gd.getNextString().trim(); // By Claude on 06/22/2026
 		this.curt_dnn_thresh =          gd.getNextNumber();      // By Claude on 06/13/2026
 		this.curt_dnn_recur_splat =     gd.getNextBoolean();     // By Claude on 06/14/2026
 		this.curt_dnn_recur_scale =     gd.getNextNumber();      // By Claude on 06/14/2026
@@ -6393,6 +6401,8 @@ min_str_neib_fpn	0.35
 		properties.setProperty(prefix+"curt_dnn_remote_host", this.curt_dnn_remote_host);   // String // By Claude on 06/20/2026
 		properties.setProperty(prefix+"curt_dnn_remote_model", this.curt_dnn_remote_model); // String // By Claude on 06/20/2026
 		properties.setProperty(prefix+"curt_dnn_remote_srcdir", this.curt_dnn_remote_srcdir); // String // By Claude on 06/20/2026
+		properties.setProperty(prefix+"curt_dnn_l2",          this.curt_dnn_l2+"");         // boolean // By Claude on 06/22/2026
+		properties.setProperty(prefix+"curt_dnn_l2_model",    this.curt_dnn_l2_model);      // String // By Claude on 06/22/2026
 		properties.setProperty(prefix+"curt_synth_src",       this.curt_synth_src+"");      // boolean // By Claude on 06/11/2026
 		properties.setProperty(prefix+"curt_synth_scale",     this.curt_synth_scale+"");    // double  // By Claude on 06/12/2026
 		properties.setProperty(prefix+"curt_synth_bg_avg",    this.curt_synth_bg_avg+"");   // int     // By Claude on 06/20/2026
@@ -6788,6 +6798,8 @@ min_str_neib_fpn	0.35
 		if (properties.getProperty(prefix+"curt_dnn_remote_host")!=null) this.curt_dnn_remote_host=(String) properties.getProperty(prefix+"curt_dnn_remote_host"); // By Claude on 06/20/2026
 		if (properties.getProperty(prefix+"curt_dnn_remote_model")!=null)  this.curt_dnn_remote_model=(String) properties.getProperty(prefix+"curt_dnn_remote_model"); // By Claude on 06/20/2026
 		if (properties.getProperty(prefix+"curt_dnn_remote_srcdir")!=null) this.curt_dnn_remote_srcdir=(String) properties.getProperty(prefix+"curt_dnn_remote_srcdir"); // By Claude on 06/20/2026
+		if (properties.getProperty(prefix+"curt_dnn_l2")!=null)         this.curt_dnn_l2=Boolean.parseBoolean(properties.getProperty(prefix+"curt_dnn_l2")); // By Claude on 06/22/2026
+		if (properties.getProperty(prefix+"curt_dnn_l2_model")!=null)   this.curt_dnn_l2_model=(String) properties.getProperty(prefix+"curt_dnn_l2_model"); // By Claude on 06/22/2026

 		if (properties.getProperty(prefix+"curt_synth_src")!=null)       this.curt_synth_src=Boolean.parseBoolean(properties.getProperty(prefix+"curt_synth_src"));         // By Claude on 06/11/2026
 		if (properties.getProperty(prefix+"curt_synth_scale")!=null)     this.curt_synth_scale=Double.parseDouble(properties.getProperty(prefix+"curt_synth_scale"));       // By Claude on 06/12/2026
@@ -9065,6 +9077,8 @@ min_str_neib_fpn	0.35
 		imp.curt_dnn_remote_host =  this.curt_dnn_remote_host; // By Claude on 06/20/2026
 		imp.curt_dnn_remote_model = this.curt_dnn_remote_model; // By Claude on 06/20/2026
 		imp.curt_dnn_remote_srcdir = this.curt_dnn_remote_srcdir; // By Claude on 06/20/2026
+		imp.curt_dnn_l2 =           this.curt_dnn_l2;        // By Claude on 06/22/2026
+		imp.curt_dnn_l2_model =     this.curt_dnn_l2_model;  // By Claude on 06/22/2026
 		imp.curt_synth_src =        this.curt_synth_src;     // By Claude on 06/11/2026
 		imp.curt_synth_scale =      this.curt_synth_scale;   // By Claude on 06/12/2026
 		imp.curt_synth_bg_avg =     this.curt_synth_bg_avg;  // By Claude on 06/20/2026

--- a/src/main/java/com/elphel/imagej/tileprocessor/OpticalFlow.java
+++ b/src/main/java/com/elphel/imagej/tileprocessor/OpticalFlow.java
@@ -7239,9 +7239,9 @@ java.lang.NullPointerException
 		// Moved to the very end, after 3D
 //		boolean test_vegetation = true;

-		if (master_CLT.hasCenterClt() && clt_parameters.imp.cuas_targets_en) { // cuas mode
+		if (master_CLT.hasCenterClt() && clt_parameters.imp.cuas_targets_en && !clt_parameters.imp.curt_en) { // cuas mode
 			if (debugLevel >-3) {
-				System.out.println("===== Running CUAS ranging. =====");
+				System.out.println("===== Running CUAS ranging in Oracle mode. =====");
 			}
 			CuasRanging cuasRanging = new CuasRanging 	(
 					clt_parameters, // CLTParameters     clt_parameters,
@@ -7258,7 +7258,34 @@ java.lang.NullPointerException
 					System.out.println("Target detection DONE");
 				}
 			}
-		}		
+		}
+
+		// CUAS RT (our code) - coexists with the oracle CuasRanging above (separate buttons/modes, no interference).
+		// Generate the merged-CUAS stack on the GPU EXPLICITLY here (CuasRanging.prepareFpixels() uses the CUDA
+		// tile-processor kernels - may be incompatible with a future CUDA), then hand the plain ImagePlus to the
+		// CUDA-free CuasDetectRT. By Claude on 06/24/2026
+		if (clt_parameters.imp.curt_en && master_CLT.hasCenterClt()) {
+			System.out.println("===== Running CUAS RT detection (curt_en). =====");
+			CuasRanging cuasRangingRT = new CuasRanging(
+					clt_parameters, // CLTParameters     clt_parameters,
+					master_CLT,     // QuadCLT           center_CLT,
+					quadCLTs,       // QuadCLT []        scenes,
+					debugLevel);
+			ImagePlus imp_targets = cuasRangingRT.prepareFpixels();      // GPU generator (explicit, CUDA-sensitive)
+			cuasRangingRT.saveUasFlightLogCsv(uasLogReader, imp_targets); // UAS flight-log truth -> <name>-UAS_DATA.tsv (mode-0 only; needs QuadCLT pose). By Claude on 06/24/2026
+			new CuasDetectRT(
+					clt_parameters,               // CLTParameters     clt_parameters,
+					uasLogReader,                 // UasLogReader      uasLogReader,
+					imp_targets,                  // ImagePlus         imp_targets (no GPU inside CuasDetectRT)
+					master_CLT.getX3dDirectory(), // String            model_directory (outputs land like oracle)
+					master_CLT.getImageName(),    // String            core_base_name
+					master_CLT.correctionsParameters.cuasSynth, // String  cuas_synth_dir (shared, list SET; "" -> model_directory)
+					master_CLT.correctionsParameters.cuasNoise, // String  cuas_noise (inline per-level scales, list SET; "" -> sqrt default). By Claude on 06/24/2026
+					debugLevel).detectTargets(
+							clt_parameters,       // CLTParameters     clt_parameters,
+							batch_mode,           // boolean           batch_mode,
+							debugLevel);          // int               debugLevel
+		}

 		if (generate_mapped || reuse_video) { // modifies combo_dsn_final ?
 			int tilesX =  master_CLT.getTileProcessor().getTilesX();

--- a/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
+++ b/src/main/java/com/elphel/imagej/tileprocessor/TwoQuadCLT.java
@@ -9241,6 +9241,8 @@ if (debugLevel > -100) return true; // temporarily !
 									 clt_parameters,  // CLTParameters     clt_parameters,
 									 uasLogReader,    // UasLogReader      uasLogReader,
 									 model_paths[i],  // 	String            model_directory) {
+									 quadCLT_main.correctionsParameters.cuasSynth, // String  cuas_synth_dir (shared, list SET; "" -> model_directory). By Claude on 06/24/2026
+									 quadCLT_main.correctionsParameters.cuasNoise, // String  cuas_noise (inline per-level scales, list SET; "" -> sqrt default). By Claude on 06/24/2026
 									 debugLevel);     // int               debugLevel)
 							CuasMotion cuasMotion= cuasDetectRT.detectTargets(
 									clt_parameters,          // CLTParameters     clt_parameters,

--- a/src/main/resources/cuas_dnn/infer_server.py
+++ b/src/main/resources/cuas_dnn/infer_server.py
@@ -27,7 +27,27 @@ import torch.nn.functional as F
 from model import RawFCN

 CMD_BYE, CMD_UPLOAD, CMD_INFER, CMD_READBACK = 0, 1, 2, 3
-GPU_CHUNK = 16   # scenes processed per batched GPU pass (memory vs utilization)
+CMD_STATUS = 4     # report loaded L1/L2 model paths so the client can detect a model change. By Claude on 06/24/2026
+GPU_CHUNK = 16     # scenes processed per batched GPU pass (memory vs utilization)
+VEL_DECIMATE = 4   # velocity-grid cells per px/level-frame (Java curt_vel_decimate); L2 was trained on Vx,Vy in px/frame (cells/4). By Claude on 06/22/2026
+AGE_THR = 0.2      # L2 track-age death threshold: a cell with det<=AGE_THR "dies" (age 0). Raised 0.01->0.2 so the
+                   # weak noise halo dies and the 5x5 max-pool can't dilate age across gaps. By Claude on 06/24/2026
+AGE_K   = 0.5      # ancestor gate: a 5x5 previous-frame neighbor may pass its age only if its det >= AGE_K * (local
+                   # max det in that 5x5) - blocks a weak-but-old straggler from seeding age. By Claude on 06/24/2026
+NOISE_REF_LEVEL = 3  # the net is calibrated to ~LEV3's absolute noise (low-contrast signals tested mainly on LEV3).
+                   # The pyramid averages 2 frames/level so sigma drops sqrt(2)/level; scale each level's L1 input by
+                   # sqrt(2)^(level-REF) to put every level at LEV3's absolute noise (uniform FP). By Claude on 06/24/2026
+
+
+def load_l2(run_dir, device):
+    # Optional Layer-2 (track-before-detect) recurrent net; FCN so it runs on any H,W. By Claude on 06/22/2026
+    from layer2 import Layer2Net
+    ck = torch.load(os.path.join(run_dir, "model.pt"), map_location="cpu", weights_only=False)
+    a = ck.get("args", {}) or {}
+    m = Layer2Net(ch_in=3, ch_hidden=a.get("ch", 24), grid=a.get("G", 32), vmax=a.get("vmax", 1.4))
+    m.load_state_dict(ck["model"]); m.eval().to(device)
+    print(f"loaded L2 {run_dir}/model.pt: ch_hidden={a.get('ch',24)} vmax={a.get('vmax',1.4)}", flush=True)
+    return m


 def load_model(run_dir, device):
@@ -113,12 +133,14 @@ def decode(field, vr, roi, rmax_cells):
    return offset5, roi_field.permute(0, 2, 3, 1).contiguous(), nvel  # [B,5,H,W], [B,rh,rw,nvel]


-def serve(run_dir, host, port):
+def serve(run_dir, host, port, l2_run=None):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    torch.backends.cudnn.benchmark = True
+    torch.set_grad_enabled(False)   # inference-only server; L2 recurrence (m2.cell/decode) isn't @no_grad'd. By Claude 06/22/2026
    m, N, P, vr = load_model(run_dir, device)
+    m2 = load_l2(l2_run, device) if l2_run else None    # optional Layer-2; None -> L1-only (old way). By Claude 06/22/2026
    print(f"device={device} gpu={torch.cuda.get_device_name(0) if device=='cuda' else 'cpu'} "
-          f"patch={P} N={N} vr={vr}", flush=True)
+          f"patch={P} N={N} vr={vr} L2={'on('+l2_run+')' if m2 is not None else 'off'}", flush=True)
    _ = shift_stitch(m, torch.zeros(1, N, 64, 64, device=device), P)   # warm-up
    if device == "cuda":
        torch.cuda.synchronize()
@@ -134,11 +156,21 @@ def serve(run_dir, host, port):
        conn, addr = srv.accept()
        conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
        print(f"{datetime.now():%H:%M:%S} client {addr}", flush=True)
+        h_l2 = None     # Layer-2 recurrent hidden state [1,ch,H,W]; persists across INFER chunks, reset on l2_reset. By Claude 06/22/2026
+        age_l2 = None   # L2 track-age field [1,1,H,W]; sprev_l2 = previous-frame L2 det; carried+reset like h_l2. By Claude 06/24/2026
+        sprev_l2 = None
        try:
            while True:
                cmd = struct.unpack(">i", recvall(conn, 4))[0]
                if cmd == CMD_BYE:
                    break
+                if cmd == CMD_STATUS:
+                    # Reply with loaded L1 + L2 model paths (len-prefixed UTF-8); empty L2 = L1-only.
+                    # Lets the Java client detect a model change and relaunch. By Claude on 06/24/2026
+                    b1 = run_dir.encode("utf-8")
+                    b2 = (l2_run or "").encode("utf-8")
+                    conn.sendall(struct.pack(">i", len(b1)) + b1 + struct.pack(">i", len(b2)) + b2)
+                    continue
                if cmd == CMD_UPLOAD:
                    T, H, W = struct.unpack(">iii", recvall(conn, 12))
                    data = recvall(conn, T * H * W * 4)
@@ -157,7 +189,15 @@ def serve(run_dir, host, port):
                elif cmd == CMD_INFER:
                    level, start, count, stride, rx, ry, rw, rh = struct.unpack(">iiiiiiii", recvall(conn, 32))
                    rmax = struct.unpack(">d", recvall(conn, 8))[0]
-                    lev = pyr[level]                      # [Tl,H,W]
+                    l2_enable, l2_reset = struct.unpack(">ii", recvall(conn, 8))   # By Claude 06/22/2026
+                    noise_scale = struct.unpack(">d", recvall(conn, 8))[0]         # per-level L1-input noise scale from Java (single source of truth); <=0 -> server fallback. By Claude 06/24/2026
+                    use_l2 = bool(l2_enable) and (m2 is not None)
+                    # Per-level noise normalization: scale this level's L1 input to LEV3's absolute noise so all
+                    # levels sit in the net's trained regime (uniform FP across levels). LEV3 -> 1.0, lower/noisier
+                    # levels scale down, higher levels up. Independent of the age filter. By Claude on 06/24/2026
+                    if noise_scale <= 0.0:                # fallback only: Java didn't send one -> theoretical sqrt(2)^(level-ref)
+                        noise_scale = 2.0 ** ((level - NOISE_REF_LEVEL) / 2.0)
+                    lev = pyr[level] * noise_scale        # [Tl,H,W]
                    H, W = lev.shape[1], lev.shape[2]
                    nvel = (2 * vr + 1) ** 2
                    o5_gpu, rf_gpu = [], []
@@ -175,18 +215,54 @@ def serve(run_dir, host, port):
                        wins = torch.stack([lev[(start + (c0 + j) * stride) - N + 1:
                                                (start + (c0 + j) * stride) + 1].flip(0) for j in range(b)])  # [b,N,H,W]
                        field = shift_stitch(m, wins, P)  # [b,C,H,W]
-                        o5, rf, nv = decode(field, vr, (rx, ry, rw, rh), rmax)
+                        o5, rf, nv = decode(field, vr, (rx, ry, rw, rh), rmax)   # L1: ghostbusted offset5 + ROI
                        nvel = nv
-                        o5_gpu.append(o5); rf_gpu.append(rf)   # keep on GPU
+                        if use_l2:
+                            # Layer-2 (track-before-detect) over the scene/time axis. Feed the FULL
+                            # (non-ghostbusted) field as (s, Vx/vd, Vy/vd) px/level-frame; carry the recurrent
+                            # hidden state across chunks (reset on l2_reset at the level's first chunk). Output
+                            # replaces offset5 with {L1 dx, L1 dy, L2 det, L2 Vx*vd, L2 Vy*vd} (vel back to cells
+                            # so Java's existing /vel_decimate viz scaling -> px/level-frame). By Claude 06/22/2026
+                            ong, _, _ = decode(field, vr, (rx, ry, rw, rh), 0.0)        # no ghostbuster (L2 gets full field)
+                            l2in = torch.stack([ong[:, 2], ong[:, 3] / VEL_DECIMATE, ong[:, 4] / VEL_DECIMATE], 1)  # [b,3,H,W]
+                            # FPN-bad margins arrive as NaN; the recurrent circular conv would otherwise spread
+                            # NaN inward by the kernel radius every frame ("eating" the borders). Sanitize the
+                            # input so NaN can never seed/propagate through the hidden state. By Claude 06/22/2026
+                            l2in = torch.nan_to_num(l2in, nan=0.0, posinf=0.0, neginf=0.0)
+                            Hf, Wf = l2in.shape[2], l2in.shape[3]
+                            if (h_l2 is None) or (h_l2.shape[2] != Hf) or (h_l2.shape[3] != Wf) or (l2_reset and c0 == 0):
+                                h_l2 = torch.zeros(1, m2.ch_hidden, Hf, Wf, device=device, dtype=field.dtype)
+                                age_l2 = torch.zeros(1, 1, Hf, Wf, device=device, dtype=field.dtype)   # track age, carried+reset like h_l2
+                                sprev_l2 = torch.zeros(1, 1, Hf, Wf, device=device, dtype=field.dtype)  # previous-frame L2 det
+                            dets, vxs, vys, ages = [], [], [], []
+                            for j in range(b):                                          # forward in time, carry hidden + age
+                                h_l2 = m2.cell(l2in[j:j+1], h_l2)
+                                dlog, vel = m2.decode(h_l2)                             # [1,1,H,W],[1,2,H,W]
+                                s = torch.sigmoid(dlog[:, 0:1])                         # [1,1,H,W] current L2 det
+                                # AGE (track-before-detect persistence): die where det<=AGE_THR, else 1 + oldest age among
+                                # 5x5 PREVIOUS-frame neighbors that are themselves STRONG (det >= AGE_K * local-max det) -
+                                # so a weak-but-old straggler can't seed age; the raised AGE_THR stops the noise halo from
+                                # dilating age across gaps. Level-uniform 5x5 (pyramid keeps ~const px/level-frame). By Claude 06/24/2026
+                                maxS = F.max_pool2d(sprev_l2, 5, 1, 2)                          # local max prev-det in 5x5
+                                elig = (sprev_l2 >= AGE_K * maxS) & (sprev_l2 > AGE_THR)        # strong AND alive ancestors
+                                prev = torch.where(elig, age_l2, torch.zeros_like(age_l2))      # only strong ancestors pass age
+                                age_l2 = torch.where(s > AGE_THR, F.max_pool2d(prev, 5, 1, 2) + 1.0, torch.zeros_like(age_l2))
+                                sprev_l2 = s
+                                dets.append(s[:, 0]); ages.append(age_l2[:, 0]); vxs.append(vel[:, 0]); vys.append(vel[:, 1])
+                            l2vx = torch.cat(vxs, 0) * VEL_DECIMATE; l2vy = torch.cat(vys, 0) * VEL_DECIMATE
+                            o5 = torch.stack([o5[:, 0], o5[:, 1], torch.cat(dets, 0), l2vx, l2vy, torch.cat(ages, 0)], 1)  # +L2 age (6th); keep L1 dx,dy
+                        o5_gpu.append(o5); rf_gpu.append(rf)   # keep on GPU (rf = L1 ROI reference even when L2 on)
                    if device == "cuda":
                        ev1.record(); torch.cuda.synchronize(); gms = ev0.elapsed_time(ev1)
                    else:
                        gms = (time.perf_counter() - t0) * 1e3
-                    allo = torch.cat(o5_gpu, 0).cpu().numpy().astype(">f4")  # [count,5,H,W]   D2H untimed (dev-only)
+                    allo = torch.cat(o5_gpu, 0).cpu().numpy().astype(">f4")  # [count,nch,H,W] nch=5 (L1) or 6 (L2:+age) D2H untimed
+                    nch = allo.shape[1]                                      # channel count sent in the header (was hardcoded 5)
                    allr = torch.cat(rf_gpu, 0).cpu().numpy().astype(">f4")  # [count,rh,rw,nvel]
                    print(f"{datetime.now():%H:%M:%S} INFER lev={level} {count} scenes (f{start}..,stride {stride}) "
-                          f"ROI={rw}x{rh} ghost={rmax:.1f} gpu={gms:.1f}ms ({(allo.nbytes+allr.nbytes)/1e6:.1f}MB out)", flush=True)
-                    conn.sendall(struct.pack(">diiiiii", gms, H, W, count, nvel, rh, rw))
+                          f"ROI={rw}x{rh} ghost={rmax:.1f} nscale={noise_scale:.3f} L2={'on' if use_l2 else 'off'}{'(reset)' if (use_l2 and l2_reset) else ''} "
+                          f"gpu={gms:.1f}ms ({(allo.nbytes+allr.nbytes)/1e6:.1f}MB out)", flush=True)
+                    conn.sendall(struct.pack(">diiiiiii", gms, H, W, count, nch, nvel, rh, rw))  # +nch (offset channels). By Claude 06/24/2026
                    conn.sendall(allo.tobytes())
                    conn.sendall(allr.tobytes())
                elif cmd == CMD_READBACK:
@@ -203,7 +279,8 @@ def serve(run_dir, host, port):
 if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("--run", default="runs/weighted9_pm_s")
+    ap.add_argument("--l2run", default=None, help="optional Layer-2 run dir (model.pt); omit for L1-only")
    ap.add_argument("--host", default="0.0.0.0")
    ap.add_argument("--port", type=int, default=5577)
    args = ap.parse_args()
-    serve(args.run, args.host, args.port)
+    serve(args.run, args.host, args.port, l2_run=args.l2run)
--- a/src/main/resources/cuas_dnn/layer2.py
+++ b/src/main/resources/cuas_dnn/layer2.py
+"""C5P Layer-2 (track-before-detect) — minimal circular-ConvGRU on a torus. By Claude on 06/21/2026
+
+Layer 1 (frozen RawFCN) emits, per level-frame, a dense stride-4 field {s, Vx, Vy, dx, dy}.
+Layer 2 is a RECURRENT net whose hidden state is the running 4D track memory (x, y, vx, vy),
+fed a target-following 32x32 slice of that field one frame at a time. This first cut is the
+SIMPLEST viable version (per Andrey 06/21):
+  - plain circular ConvGRU  (NO explicit velocity-advection warp yet — added as a 2nd step;
+    the conv recurrence still learns local motion implicitly),
+  - dense Gaussian-bump readout (det map + Vx,Vy maps; supervise with a bump at truth),
+  - single target, free-orbit (absolute position = torus-local + winding offset, tracked
+    OUTSIDE the net; not needed for this module's forward/backward).
+
+Torus rationale: xy is a PERIODIC 32x32 grid (Conv2d padding_mode='circular'). With the target
+drift over a window staying << 32 cells, the single target "lives in infinite space" on a tiny
+fixed array — no border code, translation-equivariant everywhere, trivial to batch. vx,vy are
+NOT periodic (bounded by vmax; velocity does not wrap).
+
+UNITS: the field grid is stride-4, so one torus cell = 4 scene px. Vx,Vy channels and the
+velocity readout are kept in Layer-1 units (px/level-frame); vmax≈1.4 px/frame => ~0.35 cells/
+frame => ~2.8 cells over N=8 (<< 32, the R<<G condition the torus relies on). The /4 conversion
+to cells only matters once we add the advection warp.
+
+Run the smoke test:  python layer2.py
+"""
+
+import argparse
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+# ---------------------------------------------------------------------------
+# Recurrent cell
+# ---------------------------------------------------------------------------
+class ConvGRUCellTorus(nn.Module):
+    """One ConvGRU step with circular (toroidal) padding on the xy grid. By Claude on 06/21/2026
+
+    Standard ConvGRU:
+        z = sigmoid(Wz . [x, h])            update gate     [B, Ch, G, G]
+        r = sigmoid(Wr . [x, h])            reset gate      [B, Ch, G, G]
+        n = tanh   (Wn . [x, r*h])          candidate state [B, Ch, G, G]
+        h'= (1 - z) * h + z * n             new hidden      [B, Ch, G, G]
+    All convs are k x k with padding_mode='circular' so the 32x32 grid wraps both axes.
+    """
+    def __init__(self, ch_in, ch_hidden, k=3):
+        super().__init__()
+        pad = k // 2
+        cat = ch_in + ch_hidden                                  # concat of input + hidden along channels
+        # one conv per gate; circular pad makes the receptive field wrap the torus edges
+        self.conv_z = nn.Conv2d(cat, ch_hidden, k, padding=pad, padding_mode='circular')
+        self.conv_r = nn.Conv2d(cat, ch_hidden, k, padding=pad, padding_mode='circular')
+        self.conv_n = nn.Conv2d(cat, ch_hidden, k, padding=pad, padding_mode='circular')
+
+    def forward(self, x, h):
+        # x: [B, Cin, G, G]   h: [B, Ch, G, G]  ->  h_new: [B, Ch, G, G]
+        xh = torch.cat([x, h], dim=1)                            # [B, Cin+Ch, G, G]
+        z = torch.sigmoid(self.conv_z(xh))                       # [B, Ch, G, G] update gate
+        r = torch.sigmoid(self.conv_r(xh))                       # [B, Ch, G, G] reset gate
+        xrh = torch.cat([x, r * h], dim=1)                       # [B, Cin+Ch, G, G] reset-masked hidden
+        n = torch.tanh(self.conv_n(xrh))                         # [B, Ch, G, G] candidate
+        return (1.0 - z) * h + z * n                             # [B, Ch, G, G] new hidden
+
+
+# ---------------------------------------------------------------------------
+# Layer-2 net
+# ---------------------------------------------------------------------------
+class Layer2Net(nn.Module):
+    """Recurrent track-before-detect over a torus field sequence. By Claude on 06/21/2026
+
+    forward(seq) consumes T frames of the Layer-1 field slice and returns, per frame, a dense
+    det logit + (Vx,Vy) over the torus. Hidden state starts at 0 (no track) and accumulates
+    evidence across frames — the recurrence IS the track filter.
+    """
+    def __init__(self, ch_in=3, ch_hidden=24, grid=32, vmax=1.4, k=3):
+        super().__init__()
+        self.ch_in = ch_in           # field channels fed in: s, Vx, Vy
+        self.ch_hidden = ch_hidden   # hidden track-memory channels
+        self.grid = grid             # torus side G (cells); one cell = 4 scene px
+        self.vmax = vmax             # velocity readout bound, px/level-frame (matches Layer-1 training vmax)
+        self.cell = ConvGRUCellTorus(ch_in, ch_hidden, k=k)
+        # readout head: hidden -> det(1) + raw Vx,Vy(2); 1x1 conv = per-cell decode
+        self.head = nn.Conv2d(ch_hidden, 1 + 2, 1)
+
+    def init_hidden(self, B, device, dtype):
+        # zero hidden = "no track yet"; [B, Ch, G, G]
+        return torch.zeros(B, self.ch_hidden, self.grid, self.grid, device=device, dtype=dtype)
+
+    def decode(self, h):
+        # h: [B, Ch, G, G] -> det_logit [B, 1, G, G], vel [B, 2, G, G] bounded to +-vmax
+        o = self.head(h)                                         # [B, 3, G, G]
+        det = o[:, 0:1]                                          # [B, 1, G, G] raw logit
+        vel = self.vmax * torch.tanh(o[:, 1:3])                  # [B, 2, G, G] px/level-frame
+        return det, vel
+
+    def forward(self, seq, h=None):
+        # seq: [B, T, Cin, G, G]  ->  det [B, T, 1, G, G], vel [B, T, 2, G, G]
+        B, T = seq.shape[0], seq.shape[1]
+        if h is None:
+            h = self.init_hidden(B, seq.device, seq.dtype)
+        dets, vels = [], []
+        for t in range(T):                                       # BPTT unrolls this loop
+            h = self.cell(seq[:, t], h)                          # [B, Ch, G, G] recurrent update
+            det, vel = self.decode(h)                            # per-frame readout
+            dets.append(det)
+            vels.append(vel)
+        det = torch.stack(dets, dim=1)                           # [B, T, 1, G, G]
+        vel = torch.stack(vels, dim=1)                           # [B, T, 2, G, G]
+        return det, vel
+
+
+# ---------------------------------------------------------------------------
+# Dense Gaussian-bump supervision (single target)
+# ---------------------------------------------------------------------------
+def bump_target(pos_xy, grid, sigma=1.0, device="cpu"):
+    """Toroidal Gaussian bump at (sub-cell) position pos_xy. By Claude on 06/21/2026
+    pos_xy: [B, T, 2] (x, y) in torus cells (may be fractional / out of [0,G) — wraps).
+    Returns det bump [B, T, 1, G, G] in [0,1]. Distance uses the WRAPPED (toroidal) metric so
+    a target near the edge still gets a single round bump that straddles the seam.
+    """
+    B, T = pos_xy.shape[0], pos_xy.shape[1]
+    coord = torch.arange(grid, device=device).float()           # [G]
+    gy, gx = torch.meshgrid(coord, coord, indexing='ij')        # [G, G] each
+    gx = gx[None, None]; gy = gy[None, None]                     # [1,1,G,G] broadcast over B,T
+    px = pos_xy[..., 0][..., None, None]                         # [B, T, 1, 1]
+    py = pos_xy[..., 1][..., None, None]                         # [B, T, 1, 1]
+    # wrapped (toroidal) coordinate difference: nearest image around the G-periodic grid
+    dx = (gx - px + grid / 2) % grid - grid / 2                  # [B, T, G, G] in (-G/2, G/2]
+    dy = (gy - py + grid / 2) % grid - grid / 2
+    g = torch.exp(-(dx * dx + dy * dy) / (2 * sigma * sigma))    # [B, T, G, G]
+    return g[:, :, None]                                         # [B, T, 1, G, G]
+
+
+def layer2_loss(det_logit, vel, det_t, vel_t, support=0.3, pos_weight=20.0):
+    """Detection BCE (sparse bump -> pos_weight) + velocity MSE on the bump support. By Claude 06/21
+    det_logit: [B,T,1,G,G] raw   det_t: [B,T,1,G,G] in [0,1]
+    vel:       [B,T,2,G,G]       vel_t: [B,T,2,G,G]  (px/level-frame; only used where det_t>support)
+    """
+    pw = torch.tensor(pos_weight, device=det_logit.device)
+    l_det = F.binary_cross_entropy_with_logits(det_logit, det_t, pos_weight=pw)
+    m = (det_t > support)                                        # [B,T,1,G,G] bump core mask
+    if m.any():
+        m2 = m.expand_as(vel)                                    # [B,T,2,G,G]
+        l_vel = F.mse_loss(vel[m2], vel_t[m2])
+    else:
+        l_vel = vel.sum() * 0.0
+    return l_det + 0.3 * l_vel, {"det": float(l_det.detach()), "vel": float(l_vel.detach() if torch.is_tensor(l_vel) else l_vel)}
+
+
+# ---------------------------------------------------------------------------
+# Smoke test: fake Layer-1-like field, single target on a wrapping straight line.
+# Verifies the module trains end-to-end (forward + BPTT + loss) BEFORE real Layer-1 fields.
+# This is NOT the real training data — that comes in the next step (trajectory-sequence gen).
+# ---------------------------------------------------------------------------
+def fake_field_batch(rng, B, T, grid, vmax, sigma=1.0, snr=4.0, device="cpu"):
+    """Build a toy 'Layer-1 field' sequence + truth. By Claude on 06/21/2026
+    A single target starts at a random torus cell, moves at constant (vx,vy) px/frame
+    (=> (vx,vy)/4 cells/frame), wrapping. The s-channel is a noisy Gaussian bump at the target;
+    Vx,Vy channels carry the true velocity over the bump (+ noise), 0 elsewhere. Returns:
+      seq    [B,T,3,G,G]  (s, Vx, Vy)
+      pos    [B,T,2]      target (x,y) in cells
+      veltru [B,T,2]      true (Vx,Vy) px/level-frame
+    """
+    seq = torch.zeros(B, T, 3, grid, grid, device=device)
+    pos = torch.zeros(B, T, 2, device=device)
+    veltru = torch.zeros(B, T, 2, device=device)
+    for b in range(B):
+        x0 = rng.uniform(0, grid); y0 = rng.uniform(0, grid)
+        ang = rng.uniform(0, 2 * np.pi); spd = rng.uniform(0.3, 1.0) * vmax
+        vx = spd * np.cos(ang); vy = spd * np.sin(ang)          # px/level-frame
+        for t in range(T):
+            cx = (x0 + vx / 4.0 * t)                             # cells (stride-4 => /4)
+            cy = (y0 + vy / 4.0 * t)
+            pos[b, t, 0] = cx % grid; pos[b, t, 1] = cy % grid
+            veltru[b, t, 0] = vx; veltru[b, t, 1] = vy
+        # s channel: noisy toroidal bump at the target; vel channels: truth over the bump
+        bump = bump_target(pos[b:b+1].unsqueeze(0).reshape(1, T, 2), grid, sigma, device)  # [1,T,1,G,G]
+        bump = bump[0, :, 0]                                     # [T,G,G]
+        noise = torch.from_numpy(rng.standard_normal((T, grid, grid)).astype(np.float32)).to(device)
+        seq[b, :, 0] = (snr * bump + noise).clamp(min=0.0)       # s >= 0, SNR-scaled signal in noise
+        core = (bump > 0.3).float()                              # [T,G,G]
+        seq[b, :, 1] = vx * core; seq[b, :, 2] = vy * core
+    return seq, pos, veltru
+
+
+def smoke_test(steps=400, B=16, T=8, grid=32, vmax=1.4, device=None):
+    """Overfit the toy generator a few hundred steps; det peak should sharpen, vel MSE drop."""
+    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
+    rng = np.random.default_rng(0)
+    net = Layer2Net(ch_in=3, ch_hidden=24, grid=grid, vmax=vmax).to(device)
+    opt = torch.optim.Adam(net.parameters(), 2e-3)
+    nparams = sum(p.numel() for p in net.parameters())
+    print(f"Layer2Net: {nparams} params, grid={grid}, ch_hidden=24, device={device}", flush=True)
+    for step in range(1, steps + 1):
+        seq, pos, veltru = fake_field_batch(rng, B, T, grid, vmax, device=device)
+        det_t = bump_target(pos, grid, sigma=1.0, device=device)         # [B,T,1,G,G]
+        vel_t = torch.zeros(B, T, 2, grid, grid, device=device)
+        core = (det_t[:, :, 0] > 0.3)                                     # [B,T,G,G]
+        for c in range(2):
+            vel_t[:, :, c][core] = veltru[..., c][..., None, None].expand(-1, -1, grid, grid)[core]
+        det_logit, vel = net(seq)
+        loss, comp = layer2_loss(det_logit, vel, det_t, vel_t)
+        opt.zero_grad(); loss.backward(); opt.step()
+        if step % 50 == 0 or step == 1:
+            with torch.no_grad():
+                p = torch.sigmoid(det_logit)
+                peak = float(p[det_t > 0.3].mean())
+                bg = float(p[det_t < 0.05].max())
+            print(f"step {step:4d}  det {comp['det']:.4f}  vel {comp['vel']:.4f}  "
+                  f"peak(s@truth) {peak:.3f}  max-bg {bg:.3f}", flush=True)
+    print("smoke test done.", flush=True)
+
+
+if __name__ == "__main__":
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--steps", type=int, default=400)
+    ap.add_argument("--grid", type=int, default=32)
+    ap.add_argument("--vmax", type=float, default=1.4)
+    a = ap.parse_args()
+    smoke_test(steps=a.steps, grid=a.grid, vmax=a.vmax)
--- a/src/main/resources/cuas_dnn/run_infer_server.sh
+++ b/src/main/resources/cuas_dnn/run_infer_server.sh
 #!/usr/bin/env bash
 # Start/stop the CUAS DGX inference server (PyTorch RawFCN, cuDNN) in the NGC container.
 # By Claude on 06/20/2026.  Run on the DGX (elphel@192.168.0.62).
-#   start|stop|logs|status        env: RUN=runs/<model>  PORT=5577
+#   start|stop|logs|status        env: RUN=runs/<model>  RUN2=runs/<l2>  PORT=5577
 set -euo pipefail
 NAME=cuas_infer
 IMG=nvcr.io/nvidia/pytorch:25.10-py3
 CODE=/home/elphel/c5p_dnn
 RUN="${RUN:-runs/weighted9_pm_s}"
+RUN2="${RUN2:-}"                 # optional Layer-2 run dir; empty -> L1-only. By Claude 06/22/2026
 PORT="${PORT:-5577}"
 case "${1:-start}" in
  start)
    docker rm -f "$NAME" >/dev/null 2>&1 || true
+    L2ARG=""; [ -n "$RUN2" ] && L2ARG="--l2run $RUN2"
    docker run -d --name "$NAME" --gpus all --network host \
      -v "$CODE":/work -w /work "$IMG" \
-      python infer_server.py --run "$RUN" --port "$PORT" >/dev/null
-    echo "started $NAME (run=$RUN port=$PORT)"; sleep 3; docker logs "$NAME"
+      python infer_server.py --run "$RUN" $L2ARG --port "$PORT" >/dev/null
+    echo "started $NAME (run=$RUN l2=${RUN2:-off} port=$PORT)"; sleep 3; docker logs "$NAME"
    ;;
  stop)   docker rm -f "$NAME" >/dev/null 2>&1 && echo "stopped" || echo "not running" ;;
  logs)   docker logs --tail 60 "$NAME" ;;
  status) docker ps --filter "name=$NAME" --format "{{.Names}} {{.Status}}" ;;
-  *) echo "usage: $0 {start|stop|logs|status}  (env: RUN=, PORT=)"; exit 1 ;;
+  *) echo "usage: $0 {start|stop|logs|status}  (env: RUN=, RUN2=, PORT=)"; exit 1 ;;
 esac