Commit 3dfe70ad authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: CUAS RT mode-0 path + L2 age/noise/flight-log (pre-JNA-migration checkpoint)

Checkpoint of the CUAS real-time work before the JCuda->JNA GPU-layer migration:
- OpticalFlow.buildSeries mode-0 curt_en fork: generate the merged-CUAS stack via
  CuasRanging.prepareFpixels() (GPU, explicit) then run the CUDA-free CuasDetectRT;
  coexists with the oracle (oracle gated off when curt_en).
- CuasDetectRT: file + in-memory(ImagePlus) entries via shared ingest(); -OFFSET gains
  an L2 "age" slice (5->6 ch), per-level noise scale, -LEV0 uniform naming, -OFFSET-<model> suffix.
- infer_server.py: L2 track-age (masked 5x5 max-pool, AGE_THR=0.2/AGE_K=0.5),
  per-level noise normalization (sqrt(2)^(L-3) default, Java-sent scale), nch + noise_scale
  + CMD_STATUS protocol additions; auto model-switch in CuasDnnRemote.ensureServer.
- cuasSynth + cuasNoise list SET keys (shared synth dir / inline per-level scales).
- CuasRanging.saveUasFlightLogCsv: per-frame UAS truth -> <name>-UAS_DATA.tsv (mode-0 only).
Co-Authored-By: 's avatarClaude Opus 4.8 (1M context) <noreply@anthropic.com>
parent 95e25fcc
......@@ -68,7 +68,9 @@ public class EyesisCorrectionParameters {
"resultsDirectory", // 6
"cuasSeed", // 7
"uasLogs", // 8
"skyMask"}; // 9
"skyMask", // 9
"cuasSynth", // 10 shared synthetic-grid dir (curt_synth_src), valid for all sequences. By Claude on 06/24/2026
"cuasNoise"}; // 11 INLINE per-level L2 noise-scale numbers (NOT a path); empty -> sqrt default. By Claude on 06/24/2026
public static final int KEY_INDEX_ROOT_DIRECTORY = 0;
public static final int KEY_INDEX_SOURCE_DIRECTORY = 1;
public static final int KEY_INDEX_LINKED_MODELS = 2;
......@@ -79,6 +81,8 @@ public class EyesisCorrectionParameters {
public static final int KEY_INDEX_CUAS_SEED = 7;
public static final int KEY_INDEX_UAS_LOGS = 8;
public static final int KEY_INDEX_SKY_MASK = 9;
public static final int KEY_INDEX_CUAS_SYNTH = 10; // By Claude on 06/24/2026
public static final int KEY_INDEX_CUAS_NOISE = 11; // inline per-level noise scales. By Claude on 06/24/2026
public static final String AUX_PREFIX = "AUX-";
public boolean swapSubchannels01= true; // false; // (false: 0-1-2, true - 1-0-2)
......@@ -129,6 +133,8 @@ public class EyesisCorrectionParameters {
public String cuasSeedDir= "";
public boolean useCuasSeedDir= false;
public String cuasSkyMask = ""; // TIFF image 640x512 where 1.0 - sky, 0.0 - ground, blurred with GB (now sigma==2.0)
public String cuasSynth = ""; // shared dir holding the synthetic-grid TIFF (curt_synth_src) - valid for ALL sequences, resolved from the list SET key "cuasSynth" (relative to rootDirectory), like cuasSkyMask; empty -> per-sequence model dir (old behavior). By Claude on 06/24/2026
public String cuasNoise = ""; // INLINE per-level L2 noise-scale numbers from the list SET key "cuasNoise" (e.g. "0.354,0.5,0.707,1.0,1.414,2.0"); NOT a path. Empty -> theoretical sqrt(2)^(L-3) default (computed in CuasDetectRT). By Claude on 06/24/2026
public String cuasUasLogs = ""; // json file path containing UAS logs
public double cuasUasTimeStamp = 0.0; // timestamp corresponding to the UAS time 0.0
public double [] cuasCameraATR = {0, 0, 0};
......@@ -322,6 +328,8 @@ public class EyesisCorrectionParameters {
cp.useCuasSeedDir= this.useCuasSeedDir;
cp.cuasSkyMask = this.cuasSkyMask;
cp.cuasSynth = this.cuasSynth;
cp.cuasNoise = this.cuasNoise;
cp.cuasUasLogs = this.cuasUasLogs;
cp.cuasUasTimeStamp = this.cuasUasTimeStamp;
cp.cuasCameraATR = this.cuasCameraATR.clone();
......@@ -1833,7 +1841,7 @@ public class EyesisCorrectionParameters {
if (dir_map.get(KEY_DIRS[i]).length() > 0){
Path dir_path=base_path.resolve(Paths.get(dir_map.get(KEY_DIRS[i])));
File dir_file = new File(dir_path.toString());
if ((i != KEY_INDEX_UAS_LOGS) && (i != KEY_INDEX_SKY_MASK)) { // cuasUasLogs, cuasSkyMask are files, not directories
if ((i != KEY_INDEX_UAS_LOGS) && (i != KEY_INDEX_SKY_MASK) && (i != KEY_INDEX_CUAS_SYNTH) && (i != KEY_INDEX_CUAS_NOISE)) { // cuasUasLogs/cuasSkyMask=files; cuasSynth=input dir; cuasNoise=inline numbers (not a path) - don't auto-create. By Claude on 06/24/2026
if (!dir_file.exists()) {
if (MKDIRS_ALLOW) {
dir_file.mkdirs();
......@@ -1901,7 +1909,20 @@ public class EyesisCorrectionParameters {
case KEY_INDEX_SKY_MASK: // 9: // cuasSeed
this.cuasSkyMask = dir_string; // dir_path.toString();
System.out.println("this.cuasSkyMask=" + this.cuasSkyMask);
break;
case KEY_INDEX_CUAS_SYNTH: // 10: shared synthetic-grid dir (curt_synth_src), all sequences. By Claude on 06/24/2026
this.cuasSynth = dir_string;
System.out.println("this.cuasSynth=" + this.cuasSynth);
break;
case KEY_INDEX_CUAS_NOISE: // 11: INLINE per-level noise scales (numbers, NOT a path). By Claude on 06/24/2026
{
StringBuilder nsb = new StringBuilder(dir_map.get(KEY_DIRS[i])); // first number
ArrayList<String> nx = extra_map.get(KEY_DIRS[i]); // remaining numbers
if (nx != null) for (String v : nx) nsb.append(",").append(v);
this.cuasNoise = nsb.toString();
}
System.out.println("this.cuasNoise=" + this.cuasNoise);
break;
}
}
......
......@@ -2880,8 +2880,66 @@ public class CuasRanging {
getCenter_CLT().saveStringInModelDirectory(tsv.toString(), UAS_DATA_SUFFIX, false);
}
}
// relies on calcMatchingTargetsLengths(.., true,...) called from recalcOmegas() to set [RSLT_GLOBAL]
/** Standalone UAS flight-log extraction for the mode-0 RT path (NO CuasMotion / targets array): project the
* DJI flight log to per-scene px,py,range using center_CLT pose + uasLogReader, accumulate a TSV (same columns
* + UAS_DATA_SUFFIX as addUasData), save via the QuadCLT model dir. Timestamps = imp_targets slice labels
* (skip leading non-digit "average" slices, == CuasDetectRT.ingest). Needs QuadCLT pose, so mode-0 only (not
* the mode=3 file path). By Claude on 06/24/2026 */
public void saveUasFlightLogCsv(UasLogReader uasLogReader, ImagePlus imp_targets) {
if (uasLogReader == null) {
System.out.println("saveUasFlightLogCsv(): no UAS log reader - skipping flight-log CSV");
return;
}
if ((imp_targets == null) || (imp_targets.getStackSize() < 1)) {
System.out.println("saveUasFlightLogCsv(): no imp_targets - skipping flight-log CSV");
return;
}
// camera reference LLA from center_CLT (mirror addUasData)
if ((getCenter_CLT() != null) && getCenter_CLT().hasIns()) {
double [] cameraLla = getCenter_CLT().getLla();
if ((cameraLla != null) && ((cameraLla[0] != 0.0) || (cameraLla[1] != 0.0))) {
uasLogReader.setCameraLLA(cameraLla);
}
}
double [] cam_atr = uasLogReader.getCameraATR();
int tilesX = getCenter_CLT().getTileProcessor().getTilesX();
int tilesY = getCenter_CLT().getTileProcessor().getTilesY();
ij.ImageStack stack = imp_targets.getStack();
int num_slices = stack.getSize();
int first_slice = 1;
for (; (first_slice <= num_slices) && !Character.isDigit(stack.getSliceLabel(first_slice).charAt(0)); first_slice++);
StringBuffer sb = new StringBuffer();
sb.append("seq\tts\tstatus\tpx\tpy\ttile_x\ttile_y\trange\tlat\tlon\talt\tnorth\teast\tdown\tcam_az\tcam_tilt\tcam_roll\n");
int nseq = 0;
for (int slice = first_slice; slice <= num_slices; slice++, nseq++) {
// slice labels can carry a suffix (e.g. "1773135457.547099-0"); extract the bare timestamp the
// same way as QuadCLT.getTimeStamp (regex \d{5,10}\.\d{6}), but keep it a STRING - the double
// round-trip would lose microsecond precision at this magnitude. By Claude on 06/24/2026
String norm = stack.getSliceLabel(slice).replace("_", ".");
java.util.regex.Matcher tm = java.util.regex.Pattern.compile("\\d{5,10}\\.\\d{6}").matcher(norm);
String timestamp = tm.find() ? norm.substring(tm.start(), tm.end()) : norm;
double [] uas = uasLogReader.getUasPxPyDRange(timestamp); // px, py, disparity, range
double [] llaned = uasLogReader.getUasLlaNed(timestamp); // lat, lon, alt, N, E, D
if (uas != null) {
double px = uas[0], py = uas[1], range = uas[3];
int tileX = (int) (px / GPUTileProcessor.DTT_SIZE);
int tileY = (int) (py / GPUTileProcessor.DTT_SIZE);
String status = ((tileX >= 0) && (tileY >= 0) && (tileX < tilesX) && (tileY < tilesY)) ? "IN FoV" : "OUT OF FoV";
sb.append(nseq+"\t"+timestamp+"\t"+status+"\t"+px+"\t"+py+"\t"+tileX+"\t"+tileY+"\t"+range+"\t"+
llaned[0]+"\t"+llaned[1]+"\t"+llaned[2]+"\t"+llaned[3]+"\t"+llaned[4]+"\t"+llaned[5]+"\t"+
cam_atr[0]+"\t"+cam_atr[1]+"\t"+cam_atr[2]+"\n");
} else {
sb.append(nseq+"\t"+timestamp+"\tno entry\t\t\t\t\t\t"+
((llaned != null) ? (llaned[0]+"\t"+llaned[1]+"\t"+llaned[2]) : "\t\t")+
"\t\t\t\t"+cam_atr[0]+"\t"+cam_atr[1]+"\t"+cam_atr[2]+"\n");
}
}
getCenter_CLT().saveStringInModelDirectory(sb.toString(), UAS_DATA_SUFFIX, false);
System.out.println("saveUasFlightLogCsv(): wrote UAS flight log ("+nseq+" scenes) -> "+getCenter_CLT().getImageName()+UAS_DATA_SUFFIX);
}
// relies on calcMatchingTargetsLengths(.., true,...) called from recalcOmegas() to set [RSLT_GLOBAL]
public void saveTargetStats(
final double [][][] targets_single) {
UasLogReader uasLogReader = cuasMotion.getUasLogReader();
......
......@@ -42,6 +42,9 @@ public class CuasDetectRT {
// String [] model_names;
QuadCLT master_CLT;
String fpixels_file = null;
String cuas_synth_dir = null; // shared synth-grid dir (list SET "cuasSynth"); null/empty -> scan model_directory (old per-sequence behavior). By Claude on 06/24/2026
String cuas_noise = ""; // inline per-level L2 noise scales (list SET "cuasNoise"); empty -> theoretical sqrt default. By Claude on 06/24/2026
public static final int NOISE_REF_LEVEL = 3; // reference pyramid level (scale 1.0) the net is calibrated to. By Claude on 06/24/2026
String base_name = null; // add suffix and
float [][] fpixels;
double [][] dpixels; // same as fpixels for faster calculations on 64-bit processors
......@@ -51,93 +54,147 @@ public class CuasDetectRT {
float [][] synth_pixels = null; // synthetic reference grid (scaled, NOT LoG'd), injected per pyramid level; null = real-only // By Claude on 06/14/2026
double infinity;
// mode=3 (current, fast, bypasses OpticalFlow extras): read the merged-CUAS stack from a file in
// model_directory. No GPU code here - the file was produced elsewhere. By Claude on 06/11/2026
public CuasDetectRT(
CLTParameters clt_parameters,
UasLogReader uasLogReader,
String model_directory,
String cuas_synth_dir, // shared synth-grid dir (list SET "cuasSynth"); "" -> per-sequence model_directory. By Claude on 06/24/2026
String cuas_noise, // inline per-level noise scales (list SET "cuasNoise"); "" -> sqrt default. By Claude on 06/24/2026
int debugLevel) {
this.uasLogReader = uasLogReader;
this.clt_parameters = clt_parameters;
this.model_directory = model_directory;
this.cuas_synth_dir = cuas_synth_dir;
this.cuas_noise = (cuas_noise == null) ? "" : cuas_noise;
this.infinity = clt_parameters.imp.cuas_infinity;
// The REAL stack is always the pyramid base (gives the full-length levels); the synthetic // By Claude on 06/14/2026
// reference grid (if curt_synth_src) is loaded separately below and injected per-level.
// reference grid (if curt_synth_src) is loaded separately (inside ingest()) and injected per-level.
final String fpixels_suffix = SUFFIX_FPIXELS_TIFF; // By Claude on 06/14/2026
String [] fpixels_paths = CorrectionParameters.getFilesByExtensionAsArray(model_directory, fpixels_suffix); // By Claude on 06/11/2026
String [] dbg_slices=null;
float [][] dbg_pixels=null;
boolean save_copy = false; // true;
if (fpixels_paths.length > 0) {
fpixels_file = newestFile(fpixels_paths); // pick up the newest matching file // By Claude on 06/14/2026
if (fpixels_paths.length > 1) {
System.out.println((fpixels_paths.length)+" files ending with \""+fpixels_suffix+"\" found in "+model_directory+", using the newest: "+fpixels_file); // By Claude on 06/14/2026
}
ImagePlus imp = new ImagePlus(fpixels_file);
width = imp.getWidth();
height = imp.getHeight();
if (width > 0) {
ImageStack stack = imp.getStack();
// Skip non-ts ("average") slices;
int first_slice = 1;
int num_slices = stack.getSize();
for (; !Character.isDigit(stack.getSliceLabel(first_slice).charAt(0)); first_slice++);
time_stamps = new String [num_slices - first_slice + 1];
if (save_copy) {
dbg_slices = new String [num_slices];
dbg_pixels = new float [num_slices][];
for (int i = 0; i < dbg_slices.length; i++) {
dbg_slices[i] = stack.getSliceLabel(1 + i);
dbg_pixels[i] = (float[]) stack.getPixels (1 + i);
}
}
fpixels = new float [time_stamps.length][width*height];
for (int i = 0; i < time_stamps.length; i++) {
time_stamps[i] = stack.getSliceLabel(first_slice + i);
fpixels[i] = (float[]) stack.getPixels (first_slice + i);
}
String fpixels_name = Path.of(fpixels_file).getFileName().toString();
base_name = fpixels_name.substring(0,fpixels_name.length() - fpixels_suffix.length()); // By Claude on 06/11/2026
if (clt_parameters.imp.curt_subtract_avg) base_name += "-SUBAVG"; // distinct output filenames for average-subtracted runs (don't overwrite) // By Claude on 06/14/2026
if (clt_parameters.imp.curt_synth_src) { // load the synthetic reference grid separately (NOT mixed here) // By Claude on 06/14/2026
// Real (loaded above) is the pyramid base. The synthetic file holds NORMALIZED
// (peak 1) clean targets / a velocity-reference grid: load + scale it, keep it
// aside (NO LoG - it has no slowly-varying background), and inject it TILED into
// each pyramid level just before conv5d/DNN, so it is never averaged across levels
// and looks the same at every level while the real noise is averaged (SNR up ~sqrt2/level).
double synth_scale = clt_parameters.imp.curt_synth_scale;
String [] synth_paths = CorrectionParameters.getFilesByExtensionAsArray(model_directory, SUFFIX_SYNTH_TIFF);
if (synth_paths.length > 0) {
String synth_file = newestFile(synth_paths); // pick up the newest synthetic // By Claude on 06/14/2026
ImagePlus imp_s = new ImagePlus(synth_file);
ImageStack ss = imp_s.getStack();
int s_first = 1;
for (; !Character.isDigit(ss.getSliceLabel(s_first).charAt(0)); s_first++);
int n_synth = ss.getSize() - s_first + 1;
synth_pixels = new float [n_synth][];
for (int i = 0; i < n_synth; i++) {
float [] sp = (float[]) ss.getPixels(s_first + i);
float [] sc = new float [sp.length];
for (int k = 0; k < sp.length; k++) sc[k] = (float)(sp[k] * synth_scale);
synth_pixels[i] = sc;
}
System.out.println("Synthetic grid: loaded "+n_synth+" frames (scale "+synth_scale+", no LoG) from "+synth_file+
" - injected per pyramid level, tiled (synth[j % "+n_synth+"])");
} else {
System.out.println("curt_synth_src set but no \""+SUFFIX_SYNTH_TIFF+"\" found in "+model_directory+" - real only");
}
base_name += "-SYNTH"+d2s(synth_scale)+"B"; // real is always the averaged background base // By Claude on 06/14/2026
}
System.out.println("Read image data from "+fpixels_file);
String fpixels_name = Path.of(fpixels_file).getFileName().toString();
String core_base = fpixels_name.substring(0, fpixels_name.length() - fpixels_suffix.length()); // By Claude on 06/11/2026
ingest(imp, core_base, debugLevel);
if (fpixels != null) {
System.out.println("Read image data from "+fpixels_file);
} else {
System.out.println("Failed to read image data from "+fpixels_file);
fpixels_file = null;
fpixels = null;
}
} else {
System.out.println("No files ending with \""+fpixels_suffix+"\" found in "+model_directory); // By Claude on 06/11/2026
fpixels_file = null;
fpixels = null;
dpixels = null;
}
return;
}
// mode=0 (OpticalFlow curt_en path): consume a PRE-GENERATED merged-CUAS stack (ImagePlus). CUDA-free -
// the GPU generator (CuasRanging.prepareFpixels(), which uses the CUDA tile-processor kernels) is called
// explicitly by the caller, where the QuadCLT/GPU context lives, so this class stays independent of the
// GPU code (which may be incompatible with a future CUDA). By Claude on 06/24/2026
public CuasDetectRT(
CLTParameters clt_parameters,
UasLogReader uasLogReader,
ImagePlus imp_targets, // pre-generated merged-CUAS stack (no UM); caller owns GPU generation
String model_directory, // where outputs are saved (e.g. center_CLT.getX3dDirectory())
String core_base_name, // base for output filenames (e.g. center_CLT.getImageName())
String cuas_synth_dir, // shared synth-grid dir (list SET "cuasSynth"); "" -> per-sequence model_directory. By Claude on 06/24/2026
String cuas_noise, // inline per-level noise scales (list SET "cuasNoise"); "" -> sqrt default. By Claude on 06/24/2026
int debugLevel) {
this.uasLogReader = uasLogReader;
this.clt_parameters = clt_parameters;
this.model_directory = model_directory;
this.cuas_synth_dir = cuas_synth_dir;
this.cuas_noise = (cuas_noise == null) ? "" : cuas_noise;
this.infinity = clt_parameters.imp.cuas_infinity;
if ((imp_targets == null) || (imp_targets.getWidth() <= 0)) {
System.out.println("CuasDetectRT(in-memory): null/empty imp_targets - nothing to do");
fpixels = null;
dpixels = null;
return;
}
System.out.println("CuasDetectRT(in-memory): ingesting generated merged stack \""+imp_targets.getTitle()+
"\" ("+imp_targets.getStackSize()+" slices) - outputs -> "+model_directory);
ingest(imp_targets, core_base_name, debugLevel);
return;
}
/** Populate fpixels/dpixels/width/height/time_stamps/base_name from a merged-CUAS ImagePlus
* (file-loaded for mode=3 or generated in memory for mode=0). Shared by both constructors;
* contains NO GPU code. By Claude on 06/24/2026 */
private void ingest(ImagePlus imp, String core_base_name, int debugLevel) {
String [] dbg_slices=null;
float [][] dbg_pixels=null;
boolean save_copy = false; // true;
width = (imp != null) ? imp.getWidth() : 0;
height = (imp != null) ? imp.getHeight() : 0;
if (width > 0) {
ImageStack stack = imp.getStack();
// Skip non-ts ("average") slices;
int first_slice = 1;
int num_slices = stack.getSize();
for (; !Character.isDigit(stack.getSliceLabel(first_slice).charAt(0)); first_slice++);
time_stamps = new String [num_slices - first_slice + 1];
if (save_copy) {
dbg_slices = new String [num_slices];
dbg_pixels = new float [num_slices][];
for (int i = 0; i < dbg_slices.length; i++) {
dbg_slices[i] = stack.getSliceLabel(1 + i);
dbg_pixels[i] = (float[]) stack.getPixels (1 + i);
}
}
fpixels = new float [time_stamps.length][width*height];
for (int i = 0; i < time_stamps.length; i++) {
time_stamps[i] = stack.getSliceLabel(first_slice + i);
fpixels[i] = (float[]) stack.getPixels (first_slice + i);
}
base_name = core_base_name;
if (clt_parameters.imp.curt_subtract_avg) base_name += "-SUBAVG"; // distinct output filenames for average-subtracted runs (don't overwrite) // By Claude on 06/14/2026
if (clt_parameters.imp.curt_synth_src) { // load the synthetic reference grid separately (NOT mixed here) // By Claude on 06/14/2026
// Real (loaded above) is the pyramid base. The synthetic file holds NORMALIZED
// (peak 1) clean targets / a velocity-reference grid: load + scale it, keep it
// aside (NO LoG - it has no slowly-varying background), and inject it TILED into
// each pyramid level just before conv5d/DNN, so it is never averaged across levels
// and looks the same at every level while the real noise is averaged (SNR up ~sqrt2/level).
double synth_scale = clt_parameters.imp.curt_synth_scale;
// Synthetic grid is sequence-INDEPENDENT: read it from the shared dir (list SET "cuasSynth")
// when set; otherwise fall back to the per-sequence model_directory (old behavior / single-
// sequence mode where the SET key is absent). By Claude on 06/24/2026
String synth_dir = ((cuas_synth_dir != null) && !cuas_synth_dir.isEmpty()) ? cuas_synth_dir : model_directory;
String [] synth_paths = CorrectionParameters.getFilesByExtensionAsArray(synth_dir, SUFFIX_SYNTH_TIFF);
if (synth_paths.length > 0) {
String synth_file = newestFile(synth_paths); // pick up the newest synthetic // By Claude on 06/14/2026
ImagePlus imp_s = new ImagePlus(synth_file);
ImageStack ss = imp_s.getStack();
int s_first = 1;
for (; !Character.isDigit(ss.getSliceLabel(s_first).charAt(0)); s_first++);
int n_synth = ss.getSize() - s_first + 1;
synth_pixels = new float [n_synth][];
for (int i = 0; i < n_synth; i++) {
float [] sp = (float[]) ss.getPixels(s_first + i);
float [] sc = new float [sp.length];
for (int k = 0; k < sp.length; k++) sc[k] = (float)(sp[k] * synth_scale);
synth_pixels[i] = sc;
}
System.out.println("Synthetic grid: loaded "+n_synth+" frames (scale "+synth_scale+", no LoG) from "+synth_file+
" - injected per pyramid level, tiled (synth[j % "+n_synth+"])");
} else {
System.out.println("curt_synth_src set but no \""+SUFFIX_SYNTH_TIFF+"\" found in "+model_directory+" - real only");
}
base_name += "-SYNTH"+d2s(synth_scale)+"B"; // real is always the averaged background base // By Claude on 06/14/2026
}
} else {
System.out.println("CuasDetectRT.ingest(): empty/invalid merged stack - no pixel data");
fpixels = null;
}
if (fpixels != null) {
dpixels = new double [fpixels.length][width*height];
......@@ -679,6 +736,11 @@ public class CuasDetectRT {
Rectangle roi,
String title_conv5d) {
final int dnn_stride = Math.max(1, clt_parameters.imp.curt_dnn_stride);
final boolean use_l2 = clt_parameters.imp.curt_dnn_l2; // run Layer-2 on the DGX: -OFFSET carries L2 det+vel instead of L1. By Claude 06/22/2026
final String l2model = use_l2 ? clt_parameters.imp.curt_dnn_l2_model : ""; // L2 run dir on the DGX; empty = L1-only (old way)
// L2 model id appended as the LAST filename suffix on -OFFSET so runs of different L2 models don't
// collide (and Dolphin's begin...end.tiff truncation still shows the model). By Claude on 06/24/2026
final String l2tag = use_l2 ? ("-" + l2model.substring(l2model.lastIndexOf('/') + 1)) : "";
final int W = getWidth(), H = getHeight();
final boolean save_rect = clt_parameters.imp.curt_save_c5rect;
final boolean save_hyper = clt_parameters.imp.curt_save_c5hyper;
......@@ -686,7 +748,7 @@ public class CuasDetectRT {
final boolean synth_bg = clt_parameters.imp.curt_synth_bg;
final int n_synth = synth ? synth_pixels.length : 0;
try { // auto-launch the DGX server if not already running (deploy bundled/override scripts, ssh-start, poll) // By Claude on 06/20/2026
CuasDnnRemote.ensureServer(clt_parameters.imp.curt_dnn_remote_host, clt_parameters.imp.curt_dnn_remote_model, clt_parameters.imp.curt_dnn_remote_srcdir);
CuasDnnRemote.ensureServer(clt_parameters.imp.curt_dnn_remote_host, clt_parameters.imp.curt_dnn_remote_model, l2model, clt_parameters.imp.curt_dnn_remote_srcdir);
} catch (Exception e) { System.out.println("runDnnRemote(): server auto-launch failed: "+e); }
try (CuasDnnRemote remote = new CuasDnnRemote(clt_parameters.imp.curt_dnn_remote_host)) {
// Build the upload array: LoG-conditioned real (optionally synth_bg_avg-decimated upstream), optionally
......@@ -713,6 +775,26 @@ public class CuasDetectRT {
+ java.util.Arrays.toString(levCounts) + " N="+N_dnn
+ " ("+(System.currentTimeMillis()-tup)+" ms, "+(dpixels_log.length*(long)H*W*4/1000000)+" MB)");
int nlevels = Math.min(pyramid_levels, levCounts.length);
// Per-level L1-input NOISE SCALES (single source of truth = here in Java; the server applies exactly what
// we send). Parse the inline cuasNoise SET; validate (>= nlevels positive numbers) else WARN + fall back to
// the theoretical sqrt(2)^(L-NOISE_REF_LEVEL). Recorded in every -OFFSET's metadata + printed near the end.
// These are critical/sensitive params - never silently misapply. By Claude on 06/24/2026
double [] noise_scales = new double [nlevels];
boolean noise_default = true;
if ((cuas_noise != null) && !cuas_noise.trim().isEmpty()) {
String [] ntok = cuas_noise.trim().split("[\\s,;]+");
boolean nok = (ntok.length >= nlevels);
double [] np = new double [ntok.length];
for (int i = 0; nok && (i < ntok.length); i++) {
try { np[i] = Double.parseDouble(ntok[i]); if (np[i] <= 0) nok = false; }
catch (NumberFormatException e) { nok = false; }
}
if (nok) { for (int L = 0; L < nlevels; L++) noise_scales[L] = np[L]; noise_default = false; }
else System.out.println("runDnnRemote(): WARNING cuasNoise=\""+cuas_noise+"\" invalid (need >= "+nlevels+" positive numbers) - falling back to theoretical sqrt scales");
}
if (noise_default) for (int L = 0; L < nlevels; L++) noise_scales[L] = Math.pow(2.0, (L - NOISE_REF_LEVEL) / 2.0);
final String noise_src = noise_default ? ("theoretical sqrt(2)^(L-"+NOISE_REF_LEVEL+")") : "cuasNoise SET";
final String noise_meta = java.util.Arrays.toString(noise_scales)+" ["+noise_src+", ref LEV"+NOISE_REF_LEVEL+"]";
for (int nlev = 0; nlev < nlevels; nlev++) {
if (!c5LevelSelected(c5_levels, nlev) || (ts_pyramid[nlev] == null)) continue;
int levLen = levCounts[nlev];
......@@ -725,7 +807,8 @@ public class CuasDetectRT {
if (num <= 0) continue;
int rnp = roi.width * roi.height;
double [][][][] dnn_roi = new double [num][][][]; // [scene][roi_pix][1][nvel] -> -RECT/-HYPER-RECT
double [][][] off5 = new double [5][num][H*W]; // {dx,dy,s,Vx,Vy} full-frame -> -OFFSET
final int n_off = use_l2 ? 6 : 5; // {dx,dy,s,Vx,Vy} (+L2 age when L2) full-frame -> -OFFSET. By Claude 06/24/2026
double [][][] off5 = new double [n_off][num][H*W];
String [] ts_dnn = new String [num];
System.out.println(now()+" runDnnRemote(): LEV"+nlev+" "+num+" of "+num_all+" scenes, stride "+dnn_stride+", ROI "+roi.width+"x"+roi.height);
// batched: request the level's in-window scenes in chunks; the DGX runs them continuously
......@@ -738,14 +821,14 @@ public class CuasDetectRT {
int cnt = Math.min(REQ, num - j0);
int startNewest = (w0 + j0)*dnn_stride + N_dnn - 1; // absolute newest of the chunk's first scene
long t0 = System.currentTimeMillis();
CuasDnnRemote.BatchResult br = remote.inferBatch(nlev, startNewest, cnt, dnn_stride, roi, rmax);
CuasDnnRemote.BatchResult br = remote.inferBatch(nlev, startNewest, cnt, dnn_stride, roi, rmax, use_l2, use_l2 && (j0 == 0), noise_scales[nlev]);
long t1 = System.currentTimeMillis();
for (int jj = 0; jj < cnt; jj++) {
int j = j0 + jj;
double [][][] fld = new double [rnp][1][br.nvel];
for (int p = 0; p < rnp; p++) { float [] rv = br.roiField[jj][p]; double [] dv = fld[p][0]; for (int v = 0; v < br.nvel; v++) dv[v] = rv[v]; }
dnn_roi[j] = fld;
for (int c = 0; c < 5; c++) { float [] sc = br.offset5[jj][c]; double [] dc = off5[c][j]; for (int p = 0; p < H*W; p++) dc[p] = sc[p]; }
for (int c = 0; c < Math.min(br.nch, off5.length); c++) { float [] sc = br.offset5[jj][c]; double [] dc = off5[c][j]; for (int p = 0; p < H*W; p++) dc[p] = sc[p]; }
int newest = (w0 + j)*dnn_stride + N_dnn - 1;
ts_dnn[j] = ts_pyramid[nlev][newest] + " f"+newest;
}
......@@ -753,35 +836,42 @@ public class CuasDetectRT {
+": gpu="+d2s(br.gpuMs)+"ms ("+d2s(br.gpuMs/cnt)+"ms/scene) roundtrip="+(t1-t0)+"ms");
}
String roiTag = "-ROI"+roi.x+"_"+roi.y+"_"+roi.width+"_"+roi.height;
String title = title_conv5d+"-DNN"+((nlev>0)?("-LEV"+nlev):"")+roiTag;
String title = title_conv5d+"-DNN"+(use_l2?"-L2":"")+"-LEV"+nlev+roiTag; // -LEV0 too, for uniform level indexing (was: no tag for level 0). By Claude on 06/24/2026
int [] win_dnn = timeWindow(ts_dnn);
double [][][][] dnn_w = win4(dnn_roi, win_dnn); String [] ts_w = winS(ts_dnn, win_dnn);
if (save_rect) QuadCLTCPU.saveImagePlusInDirectory(tagCuasImp(cuasRTUtils.showConvKernel5d( dnn_w, roi, ts_w, title+"-RECT"), clt_parameters.imp), getModelDirectory());
if (save_hyper) QuadCLTCPU.saveImagePlusInDirectory(tagCuasImp(cuasRTUtils.showConvKernel5dHyperRect(dnn_w, roi, ts_w, title+"-HYPER-RECT"), clt_parameters.imp), getModelDirectory());
int nsc = win_dnn[1]-win_dnn[0];
// -OFFSET reordered to {s,Vx,Vy,dx,dy}: s first so ImageJ auto-ranges on it (s shows the targets best). off5 is [dx,dy,s,Vx,Vy]. By Claude on 06/20/2026
final int [] ord = {2, 3, 4, 0, 1}; // [dx,dy,s,Vx,Vy] -> [s,Vx,Vy,dx,dy]
// NaN Vx,Vy,dx,dy where s < curt_dnn_thresh so velocity shows only at detections (ImageJ ignores NaN); keep s full. By Claude on 06/20/2026
// -OFFSET reordered to {s,Vx,Vy,dx,dy}(+age when L2): s first so ImageJ auto-ranges on it. off5 is
// [dx,dy,s,Vx,Vy(,age)]. By Claude on 06/20/2026, +L2 age 06/24/2026
final int [] ord = use_l2 ? new int[]{2, 3, 4, 0, 1, 5} : new int[]{2, 3, 4, 0, 1};
final String [] off_labels = use_l2 ? new String[]{"s","Vx","Vy","dx","dy","age"} : new String[]{"s","Vx","Vy","dx","dy"};
// NaN Vx,Vy,dx,dy where s < curt_dnn_thresh so velocity shows only at detections (ImageJ ignores NaN); keep s + age full. By Claude on 06/20/2026
final double off_thr = clt_parameters.imp.curt_dnn_thresh;
final double velScale = 1.0 / Math.max(1, clt_parameters.imp.curt_vel_decimate); // Vx,Vy: cells -> px/level-frame (1/vel_decimate) // By Claude on 06/20/2026
double [][][] off5_w = new double [5][nsc][]; // window the full-frame offset for -OFFSET
double [][][] off5_w = new double [ord.length][nsc][]; // window the full-frame offset for -OFFSET
for (int k = 0; k < nsc; k++) {
double [] sCh = off5[2][win_dnn[0]+k]; // s channel (off5 index 2)
for (int c = 0; c < 5; c++) {
for (int c = 0; c < ord.length; c++) {
double [] src = off5[ord[c]][win_dnn[0]+k]; double [] dst = new double [src.length];
double scl = ((c == 1) || (c == 2)) ? velScale : 1.0; // c1=Vx, c2=Vy cells->px; s/dx/dy unscaled
if (c == 0) System.arraycopy(src, 0, dst, 0, src.length); // c0 = s: keep all, no scale/NaN
double scl = ((c == 1) || (c == 2)) ? velScale : 1.0; // c1=Vx, c2=Vy cells->px; s/dx/dy/age unscaled
boolean keepFull = (c == 0) || (use_l2 && (c == 5)); // s and age shown full (not NaN-gated by s) // By Claude 06/24/2026
if (keepFull) System.arraycopy(src, 0, dst, 0, src.length);
else for (int p = 0; p < src.length; p++) dst[p] = (sCh[p] >= off_thr) ? src[p] * scl : Double.NaN;
off5_w[c][k] = dst;
}
}
ImagePlus impOff = ShowDoubleFloatArrays.showArraysHyperstack(off5_w, W, title+"-OFFSET", ts_w, new String[]{"s","Vx","Vy","dx","dy"}, false);
ImagePlus impOff = ShowDoubleFloatArrays.showArraysHyperstack(off5_w, W, title+"-OFFSET"+l2tag, ts_w, off_labels, false);
tagCuasImp(impOff, clt_parameters.imp);
impOff.setProperty("curt_save_select", new Rectangle(0,0,W,H).toString()); // -OFFSET is full-frame: record its real extent, not the 70x20 ROI // By Claude on 06/20/2026
impOff.setProperty("curt_dnn_noise_scales", noise_meta); // ALL per-level scales (provenance: recover what each layer used). By Claude on 06/24/2026
com.elphel.imagej.readers.EyesisTiff.encodeProperiesToInfo(impOff); // re-encode Info with the override
QuadCLTCPU.saveImagePlusInDirectory(impOff, getModelDirectory());
System.out.println(now()+" runDnnRemote(): LEV"+nlev+" saved -RECT/-HYPER-RECT (ROI) + -OFFSET (full "+W+"x"+H+", {s,Vx,Vy,dx,dy})");
System.out.println(now()+" runDnnRemote(): LEV"+nlev+" saved -RECT/-HYPER-RECT (ROI) + -OFFSET (full "+W+"x"+H+", {"+String.join(",", off_labels)+"})");
}
// Per-level noise scales used this run (printed near the END so it is easy to locate; also in each -OFFSET
// metadata as curt_dnn_noise_scales). Critical/sensitive - keep visible. By Claude on 06/24/2026
System.out.println(now()+" runDnnRemote(): PER-LEVEL NOISE SCALES = "+noise_meta);
} catch (Exception e) {
System.out.println("runDnnRemote() failed: "+e); e.printStackTrace();
}
......
......@@ -29,7 +29,7 @@ import java.nio.file.Files;
* BYE : cmd=0
*/
public class CuasDnnRemote implements AutoCloseable {
private static final int CMD_BYE = 0, CMD_UPLOAD = 1, CMD_INFER = 2, CMD_READBACK = 3;
private static final int CMD_BYE = 0, CMD_UPLOAD = 1, CMD_INFER = 2, CMD_READBACK = 3, CMD_STATUS = 4;
private final Socket sock;
private final DataInputStream in;
private final DataOutputStream out;
......@@ -73,27 +73,30 @@ public class CuasDnnRemote implements AutoCloseable {
* the GPU) + the total pure-GPU compute ms (continuous = production throughput). */
public static class BatchResult {
public double gpuMs;
public int H, W, count, nvel, rh, rw;
public float [][][] offset5; // [count][5][H*W]
public int H, W, count, nvel, rh, rw, nch; // nch = offset channels: 5 {dx,dy,s,Vx,Vy} or 6 (+L2 age). By Claude 06/24/2026
public float [][][] offset5; // [count][nch][H*W]
public float [][][] roiField; // [count][rh*rw][nvel]
}
/** Infer `count` scenes of a level in one round-trip (newest_s = start + s*stride). rmaxCells>0
* enables the on-GPU ghostbuster (== CuasDetectRT.dnnGhostbust). Keep `count` modest so the
* reply byte[] stays < 2GB (count*5*H*W*4): count<=64 is ~419MB at 640x512. */
public BatchResult inferBatch(int level, int start, int count, int stride, Rectangle roi, double rmaxCells) throws Exception {
public BatchResult inferBatch(int level, int start, int count, int stride, Rectangle roi, double rmaxCells,
boolean l2Enable, boolean l2Reset, double noiseScale) throws Exception {
out.writeInt(CMD_INFER); out.writeInt(level); out.writeInt(start); out.writeInt(count); out.writeInt(stride);
out.writeInt(roi.x); out.writeInt(roi.y); out.writeInt(roi.width); out.writeInt(roi.height);
out.writeDouble(rmaxCells);
out.writeInt(l2Enable ? 1 : 0); out.writeInt(l2Reset ? 1 : 0); // run Layer-2 on the DGX; reset hidden state at a level's first chunk. By Claude 06/22/2026
out.writeDouble(noiseScale); // per-level L1-input noise scale (Java is the source of truth; <=0 -> server sqrt fallback). By Claude 06/24/2026
out.flush();
BatchResult r = new BatchResult();
r.gpuMs = in.readDouble(); r.H = in.readInt(); r.W = in.readInt();
r.count = in.readInt(); r.nvel = in.readInt(); r.rh = in.readInt(); r.rw = in.readInt();
r.count = in.readInt(); r.nch = in.readInt(); r.nvel = in.readInt(); r.rh = in.readInt(); r.rw = in.readInt(); // +nch. By Claude 06/24/2026
int hw = r.H * r.W, rn = r.rh * r.rw;
byte [] ob = new byte [r.count * 5 * hw * 4]; in.readFully(ob);
byte [] ob = new byte [r.count * r.nch * hw * 4]; in.readFully(ob);
ByteBuffer obb = ByteBuffer.wrap(ob);
r.offset5 = new float [r.count][5][hw];
for (int s = 0; s < r.count; s++) for (int c = 0; c < 5; c++) for (int p = 0; p < hw; p++) r.offset5[s][c][p] = obb.getFloat();
r.offset5 = new float [r.count][r.nch][hw];
for (int s = 0; s < r.count; s++) for (int c = 0; c < r.nch; c++) for (int p = 0; p < hw; p++) r.offset5[s][c][p] = obb.getFloat();
byte [] rb = new byte [r.count * rn * r.nvel * 4]; in.readFully(rb);
ByteBuffer rbb = ByteBuffer.wrap(rb);
r.roiField = new float [r.count][rn][r.nvel];
......@@ -148,21 +151,70 @@ public class CuasDnnRemote implements AutoCloseable {
if (p.waitFor() != 0) throw new Exception("deploy " + name + " to " + sshTarget + ":" + dest + " failed");
}
/** Ensure the DGX server is reachable at hostPort; if not, deploy the (bundled/override) scripts and
* ssh-launch run_infer_server.sh with RUN=model, then poll until it accepts connections. No manual step. */
public static void ensureServer(String hostPort, String model, String srcdir) throws Exception {
/** Query the running server for its loaded {L1 model, L2 model} paths (L2="" if L1-only), or null
* if unreachable or the server is too old to answer CMD_STATUS. A short read timeout keeps an old
* server (which ignores the opcode and never replies) from hanging the client. By Claude 06/24/2026 */
private static String [] queryModels(String host, int port) {
try (Socket s = new Socket()) {
s.connect(new InetSocketAddress(host, port), 1500);
s.setSoTimeout(2500);
DataOutputStream o = new DataOutputStream(s.getOutputStream());
DataInputStream i = new DataInputStream (s.getInputStream());
o.writeInt(CMD_STATUS); o.flush();
int n1 = i.readInt(); byte [] b1 = new byte [n1]; i.readFully(b1);
int n2 = i.readInt(); byte [] b2 = new byte [n2]; i.readFully(b2);
try { o.writeInt(CMD_BYE); o.flush(); } catch (Exception e) { /* ignore */ }
return new String [] { new String(b1, java.nio.charset.StandardCharsets.UTF_8),
new String(b2, java.nio.charset.StandardCharsets.UTF_8) };
} catch (Exception e) {
return null; // unreachable, or old server (status read timed out)
}
}
/** ssh-stop the DGX server (docker rm -f) so the next launch can load a different model. By Claude 06/24/2026 */
private static void stopServer(String sshTarget, String code, int port) throws Exception {
ProcessBuilder pb = new ProcessBuilder("ssh", sshTarget,
"cd " + code + " && PORT=" + port + " ./run_infer_server.sh stop");
pb.inheritIO();
pb.start().waitFor();
}
/** Ensure the DGX server is up at hostPort WITH the requested L1+L2 models; if a different model is
* loaded (or the server is an old build that can't report), tear it down and relaunch. Then deploy the
* (bundled/override) scripts and ssh-launch run_infer_server.sh with RUN=model, polling until it accepts
* connections. No manual step. All decisions logged to System.out (-> ImageJ log file). By Claude 06/24/2026 */
public static void ensureServer(String hostPort, String model, String l2model, String srcdir) throws Exception {
String [] hp = hostPort.split(":");
String host = hp[0].trim();
int port = (hp.length > 1) ? Integer.parseInt(hp[1].trim()) : 5577;
if (canConnect(host, port, 1500)) return; // already up
String sshTarget = "elphel@" + host; // DGX login user
String code = "/home/elphel/c5p_dnn"; // DGX dir (model.py + runs/ live here)
System.out.println("CuasDnnRemote.ensureServer(): no server at " + host + ":" + port
+ " - deploying scripts + launching on " + sshTarget + " (model=" + model + ")");
String wantL2 = (l2model == null) ? "" : l2model; // normalize (server reports "" for L1-only)
if (canConnect(host, port, 1500)) { // a server is listening - is it the right one?
String [] cur = queryModels(host, port);
if ((cur != null) && cur[0].equals(model) && cur[1].equals(wantL2)) {
System.out.println("CuasDnnRemote.ensureServer(): server at " + host + ":" + port
+ " already loaded with matching models (L1=" + model + ", L2=" + (wantL2.isEmpty()?"off":wantL2) + ") - reusing");
return;
}
if (cur == null) {
System.out.println("CuasDnnRemote.ensureServer(): server at " + host + ":" + port
+ " up but did not report models (old build) - restarting for L1=" + model + ", L2=" + (wantL2.isEmpty()?"off":wantL2));
} else {
System.out.println("CuasDnnRemote.ensureServer(): server model MISMATCH at " + host + ":" + port
+ " (has L1=" + cur[0] + ", L2=" + (cur[1].isEmpty()?"off":cur[1])
+ "; want L1=" + model + ", L2=" + (wantL2.isEmpty()?"off":wantL2) + ") - restarting");
}
stopServer(sshTarget, code, port); // free the port so the relaunch loads the new model
}
String run2 = !wantL2.isEmpty() ? (" RUN2=" + wantL2) : ""; // optional Layer-2. By Claude 06/22/2026
System.out.println("CuasDnnRemote.ensureServer(): launching server on " + sshTarget
+ " (model=" + model + ", l2=" + (run2.isEmpty()?"off":wantL2) + ")");
deployScript("infer_server.py", srcdir, sshTarget, code + "/infer_server.py");
deployScript("layer2.py", srcdir, sshTarget, code + "/layer2.py"); // Layer-2 model module (Layer2Net). By Claude 06/22/2026
deployScript("run_infer_server.sh", srcdir, sshTarget, code + "/run_infer_server.sh");
ProcessBuilder pb = new ProcessBuilder("ssh", sshTarget,
"cd " + code + " && chmod +x run_infer_server.sh && RUN=" + model + " PORT=" + port + " ./run_infer_server.sh start");
"cd " + code + " && chmod +x run_infer_server.sh && RUN=" + model + run2 + " PORT=" + port + " ./run_infer_server.sh start");
pb.inheritIO();
pb.start().waitFor();
long deadline = System.currentTimeMillis() + 90000; // model load + warm-up can take a bit
......
......@@ -1168,6 +1168,8 @@ min_str_neib_fpn 0.35
public String curt_dnn_remote_host = "192.168.0.62:5577"; // DGX inference server host:port for curt_dnn_remote (see attic/imagej-elphel-internal/c5p_dnn/infer_server.py) // By Claude on 06/20/2026
public String curt_dnn_remote_model = "runs/weighted9_pm_s"; // DGX-side run dir (with model.pt) the auto-launched server loads (passed as RUN=) // By Claude on 06/20/2026
public String curt_dnn_remote_srcdir = ""; // server-scripts override dir: empty = bundled jar resource (cuas_dnn/), set = local dir - same default-vs-override scheme as the GPU kernels (cuda_project_directory) // By Claude on 06/20/2026
public boolean curt_dnn_l2 = false; // run the trained Layer-2 (track-before-detect ConvGRU) on the DGX after L1: -OFFSET then carries L2 {det,Vx,Vy} (L1's full-res, NON-ghostbusted field fed in, recurrence over the scene/time axis). Off = L1 offset5 as before (re-run with this off to inspect L1). Requires curt_dnn_remote. // By Claude on 06/22/2026
public String curt_dnn_l2_model = "runs/l2_v1"; // DGX-side Layer-2 run dir (with model.pt) the auto-launched server loads (passed as RUN2=) // By Claude on 06/22/2026
public boolean curt_dnn_recur_splat = false; // when feeding the DNN field to the recurrent layer: false = feed per-pixel field as-is; true = splat each pixel's velocity vector to its fractional offset (px+dx,py+dy) so neighbours reinforce in one sub-pixel bin // By Claude on 06/14/2026
public double curt_dnn_recur_scale = 10.0; // multiply the DNN field (softmax*s, peaks ~0.1) by this before the recurrent feed, to reach the recurrent's tuned scale (rs_min=1.0); ~10 -> peak ~1.0. Alternative to lowering curt_recur_rs_min // By Claude on 06/14/2026
public boolean curt_synth_src = true; // default set for the synthetic B-measurement experiment (set false for real-data runs); reads *-CUAS-SYNTHETIC-CUAS.tiff, output titles get -SYNTH // By Claude on 06/12/2026
......@@ -2702,10 +2704,10 @@ min_str_neib_fpn 0.35
"Maximal gain for motion blur correction (if needed more for 1 pixel, increase offset). Will be forced fro the last adjustment");
gd.addNumericField("Maximal gain pose", this.mb_max_gain_inter, 5,7,"x",
"Maximal gain for motion blur correction during interscene correlation. Will be used for all but the last adjustment.");
gd.addTab("CUAS","CUAS Parameters");
gd.addCheckbox ("Enable targets processing", this.cuas_targets_en,
gd.addTab("CUAS Oracle","CUAS OracleParameters");
gd.addCheckbox ("Enable Oracle targets processing", this.cuas_targets_en,
"Enable extraction and processing targets.");
gd.addCheckbox ("Re-calculate center CLT", this.cuas_update_existing,
gd.addCheckbox ("Re-calculate center CLT", this.cuas_update_existing,
"Re-create center_CLT if it exists (FIXME: accumulates errors - need fixing).");
gd.addNumericField("Discard margins", this.cuas_discard_border, 0,3,"pix",
"Discards this number of pixels from each side when merging images.");
......@@ -3478,6 +3480,10 @@ min_str_neib_fpn 0.35
"DGX-side run directory (containing model.pt) the auto-launched server loads, e.g. runs/weighted9_pm_s.");
gd.addStringField ("DNN remote server src (empty=bundled)", this.curt_dnn_remote_srcdir, 40, // By Claude on 06/20/2026
"Override dir for the DGX server scripts (infer_server.py / run_infer_server.sh): empty = bundled jar resource (cuas_dnn/); set = local dir. Same default-vs-override (bundled resource vs local repo) scheme as the GPU kernels - bundled is the working version, refresh it after server-script dev.");
gd.addCheckbox ("DNN Layer-2 (run on DGX)", this.curt_dnn_l2, // By Claude on 06/22/2026
"Run the trained Layer-2 track-before-detect ConvGRU on the DGX after L1 (requires 'DNN remote'). -OFFSET then shows L2 {det,Vx,Vy} (L1's full-res non-ghostbusted field fed in, recurrence over time), titled -DNN-L2-. Uncheck to re-run the old L1 way."); // By Claude on 06/22/2026
gd.addStringField ("DNN Layer-2 model (DGX run dir)", this.curt_dnn_l2_model, 24, // By Claude on 06/22/2026
"DGX-side Layer-2 run directory (containing model.pt) the auto-launched server loads as RUN2=, e.g. runs/l2_v1.");
gd.addNumericField("DNN s-threshold (VIZ ONLY)", this.curt_dnn_thresh, 6,8,"", // By Claude on 06/13/2026, viz-only 06/20/2026
"VISUALIZATION ONLY - NaN's the -OFFSET Vx,Vy,dx,dy where s < this (0 = show all) so velocity shows only at detections. Does NOT gate Layer 2 (the recurrent always sees the full field) nor the -RECT/-HYPER-RECT data. Do NOT use for critical computation - it is a display mask.");
gd.addCheckbox ("DNN recurrent feed: offset-splat", this.curt_dnn_recur_splat, // By Claude on 06/14/2026
......@@ -5026,6 +5032,8 @@ min_str_neib_fpn 0.35
this.curt_dnn_remote_host = gd.getNextString().trim(); // By Claude on 06/20/2026
this.curt_dnn_remote_model = gd.getNextString().trim(); // By Claude on 06/20/2026
this.curt_dnn_remote_srcdir = gd.getNextString().trim(); // By Claude on 06/20/2026
this.curt_dnn_l2 = gd.getNextBoolean(); // By Claude on 06/22/2026
this.curt_dnn_l2_model = gd.getNextString().trim(); // By Claude on 06/22/2026
this.curt_dnn_thresh = gd.getNextNumber(); // By Claude on 06/13/2026
this.curt_dnn_recur_splat = gd.getNextBoolean(); // By Claude on 06/14/2026
this.curt_dnn_recur_scale = gd.getNextNumber(); // By Claude on 06/14/2026
......@@ -6393,6 +6401,8 @@ min_str_neib_fpn 0.35
properties.setProperty(prefix+"curt_dnn_remote_host", this.curt_dnn_remote_host); // String // By Claude on 06/20/2026
properties.setProperty(prefix+"curt_dnn_remote_model", this.curt_dnn_remote_model); // String // By Claude on 06/20/2026
properties.setProperty(prefix+"curt_dnn_remote_srcdir", this.curt_dnn_remote_srcdir); // String // By Claude on 06/20/2026
properties.setProperty(prefix+"curt_dnn_l2", this.curt_dnn_l2+""); // boolean // By Claude on 06/22/2026
properties.setProperty(prefix+"curt_dnn_l2_model", this.curt_dnn_l2_model); // String // By Claude on 06/22/2026
properties.setProperty(prefix+"curt_synth_src", this.curt_synth_src+""); // boolean // By Claude on 06/11/2026
properties.setProperty(prefix+"curt_synth_scale", this.curt_synth_scale+""); // double // By Claude on 06/12/2026
properties.setProperty(prefix+"curt_synth_bg_avg", this.curt_synth_bg_avg+""); // int // By Claude on 06/20/2026
......@@ -6788,6 +6798,8 @@ min_str_neib_fpn 0.35
if (properties.getProperty(prefix+"curt_dnn_remote_host")!=null) this.curt_dnn_remote_host=(String) properties.getProperty(prefix+"curt_dnn_remote_host"); // By Claude on 06/20/2026
if (properties.getProperty(prefix+"curt_dnn_remote_model")!=null) this.curt_dnn_remote_model=(String) properties.getProperty(prefix+"curt_dnn_remote_model"); // By Claude on 06/20/2026
if (properties.getProperty(prefix+"curt_dnn_remote_srcdir")!=null) this.curt_dnn_remote_srcdir=(String) properties.getProperty(prefix+"curt_dnn_remote_srcdir"); // By Claude on 06/20/2026
if (properties.getProperty(prefix+"curt_dnn_l2")!=null) this.curt_dnn_l2=Boolean.parseBoolean(properties.getProperty(prefix+"curt_dnn_l2")); // By Claude on 06/22/2026
if (properties.getProperty(prefix+"curt_dnn_l2_model")!=null) this.curt_dnn_l2_model=(String) properties.getProperty(prefix+"curt_dnn_l2_model"); // By Claude on 06/22/2026
if (properties.getProperty(prefix+"curt_synth_src")!=null) this.curt_synth_src=Boolean.parseBoolean(properties.getProperty(prefix+"curt_synth_src")); // By Claude on 06/11/2026
if (properties.getProperty(prefix+"curt_synth_scale")!=null) this.curt_synth_scale=Double.parseDouble(properties.getProperty(prefix+"curt_synth_scale")); // By Claude on 06/12/2026
......@@ -9065,6 +9077,8 @@ min_str_neib_fpn 0.35
imp.curt_dnn_remote_host = this.curt_dnn_remote_host; // By Claude on 06/20/2026
imp.curt_dnn_remote_model = this.curt_dnn_remote_model; // By Claude on 06/20/2026
imp.curt_dnn_remote_srcdir = this.curt_dnn_remote_srcdir; // By Claude on 06/20/2026
imp.curt_dnn_l2 = this.curt_dnn_l2; // By Claude on 06/22/2026
imp.curt_dnn_l2_model = this.curt_dnn_l2_model; // By Claude on 06/22/2026
imp.curt_synth_src = this.curt_synth_src; // By Claude on 06/11/2026
imp.curt_synth_scale = this.curt_synth_scale; // By Claude on 06/12/2026
imp.curt_synth_bg_avg = this.curt_synth_bg_avg; // By Claude on 06/20/2026
......
......@@ -7239,9 +7239,9 @@ java.lang.NullPointerException
// Moved to the very end, after 3D
// boolean test_vegetation = true;
if (master_CLT.hasCenterClt() && clt_parameters.imp.cuas_targets_en) { // cuas mode
if (master_CLT.hasCenterClt() && clt_parameters.imp.cuas_targets_en && !clt_parameters.imp.curt_en) { // cuas mode
if (debugLevel >-3) {
System.out.println("===== Running CUAS ranging. =====");
System.out.println("===== Running CUAS ranging in Oracle mode. =====");
}
CuasRanging cuasRanging = new CuasRanging (
clt_parameters, // CLTParameters clt_parameters,
......@@ -7258,7 +7258,34 @@ java.lang.NullPointerException
System.out.println("Target detection DONE");
}
}
}
}
// CUAS RT (our code) - coexists with the oracle CuasRanging above (separate buttons/modes, no interference).
// Generate the merged-CUAS stack on the GPU EXPLICITLY here (CuasRanging.prepareFpixels() uses the CUDA
// tile-processor kernels - may be incompatible with a future CUDA), then hand the plain ImagePlus to the
// CUDA-free CuasDetectRT. By Claude on 06/24/2026
if (clt_parameters.imp.curt_en && master_CLT.hasCenterClt()) {
System.out.println("===== Running CUAS RT detection (curt_en). =====");
CuasRanging cuasRangingRT = new CuasRanging(
clt_parameters, // CLTParameters clt_parameters,
master_CLT, // QuadCLT center_CLT,
quadCLTs, // QuadCLT [] scenes,
debugLevel);
ImagePlus imp_targets = cuasRangingRT.prepareFpixels(); // GPU generator (explicit, CUDA-sensitive)
cuasRangingRT.saveUasFlightLogCsv(uasLogReader, imp_targets); // UAS flight-log truth -> <name>-UAS_DATA.tsv (mode-0 only; needs QuadCLT pose). By Claude on 06/24/2026
new CuasDetectRT(
clt_parameters, // CLTParameters clt_parameters,
uasLogReader, // UasLogReader uasLogReader,
imp_targets, // ImagePlus imp_targets (no GPU inside CuasDetectRT)
master_CLT.getX3dDirectory(), // String model_directory (outputs land like oracle)
master_CLT.getImageName(), // String core_base_name
master_CLT.correctionsParameters.cuasSynth, // String cuas_synth_dir (shared, list SET; "" -> model_directory)
master_CLT.correctionsParameters.cuasNoise, // String cuas_noise (inline per-level scales, list SET; "" -> sqrt default). By Claude on 06/24/2026
debugLevel).detectTargets(
clt_parameters, // CLTParameters clt_parameters,
batch_mode, // boolean batch_mode,
debugLevel); // int debugLevel
}
if (generate_mapped || reuse_video) { // modifies combo_dsn_final ?
int tilesX = master_CLT.getTileProcessor().getTilesX();
......
......@@ -9241,6 +9241,8 @@ if (debugLevel > -100) return true; // temporarily !
clt_parameters, // CLTParameters clt_parameters,
uasLogReader, // UasLogReader uasLogReader,
model_paths[i], // String model_directory) {
quadCLT_main.correctionsParameters.cuasSynth, // String cuas_synth_dir (shared, list SET; "" -> model_directory). By Claude on 06/24/2026
quadCLT_main.correctionsParameters.cuasNoise, // String cuas_noise (inline per-level scales, list SET; "" -> sqrt default). By Claude on 06/24/2026
debugLevel); // int debugLevel)
CuasMotion cuasMotion= cuasDetectRT.detectTargets(
clt_parameters, // CLTParameters clt_parameters,
......
......@@ -27,7 +27,27 @@ import torch.nn.functional as F
from model import RawFCN
CMD_BYE, CMD_UPLOAD, CMD_INFER, CMD_READBACK = 0, 1, 2, 3
GPU_CHUNK = 16 # scenes processed per batched GPU pass (memory vs utilization)
CMD_STATUS = 4 # report loaded L1/L2 model paths so the client can detect a model change. By Claude on 06/24/2026
GPU_CHUNK = 16 # scenes processed per batched GPU pass (memory vs utilization)
VEL_DECIMATE = 4 # velocity-grid cells per px/level-frame (Java curt_vel_decimate); L2 was trained on Vx,Vy in px/frame (cells/4). By Claude on 06/22/2026
AGE_THR = 0.2 # L2 track-age death threshold: a cell with det<=AGE_THR "dies" (age 0). Raised 0.01->0.2 so the
# weak noise halo dies and the 5x5 max-pool can't dilate age across gaps. By Claude on 06/24/2026
AGE_K = 0.5 # ancestor gate: a 5x5 previous-frame neighbor may pass its age only if its det >= AGE_K * (local
# max det in that 5x5) - blocks a weak-but-old straggler from seeding age. By Claude on 06/24/2026
NOISE_REF_LEVEL = 3 # the net is calibrated to ~LEV3's absolute noise (low-contrast signals tested mainly on LEV3).
# The pyramid averages 2 frames/level so sigma drops sqrt(2)/level; scale each level's L1 input by
# sqrt(2)^(level-REF) to put every level at LEV3's absolute noise (uniform FP). By Claude on 06/24/2026
def load_l2(run_dir, device):
# Optional Layer-2 (track-before-detect) recurrent net; FCN so it runs on any H,W. By Claude on 06/22/2026
from layer2 import Layer2Net
ck = torch.load(os.path.join(run_dir, "model.pt"), map_location="cpu", weights_only=False)
a = ck.get("args", {}) or {}
m = Layer2Net(ch_in=3, ch_hidden=a.get("ch", 24), grid=a.get("G", 32), vmax=a.get("vmax", 1.4))
m.load_state_dict(ck["model"]); m.eval().to(device)
print(f"loaded L2 {run_dir}/model.pt: ch_hidden={a.get('ch',24)} vmax={a.get('vmax',1.4)}", flush=True)
return m
def load_model(run_dir, device):
......@@ -113,12 +133,14 @@ def decode(field, vr, roi, rmax_cells):
return offset5, roi_field.permute(0, 2, 3, 1).contiguous(), nvel # [B,5,H,W], [B,rh,rw,nvel]
def serve(run_dir, host, port):
def serve(run_dir, host, port, l2_run=None):
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.backends.cudnn.benchmark = True
torch.set_grad_enabled(False) # inference-only server; L2 recurrence (m2.cell/decode) isn't @no_grad'd. By Claude 06/22/2026
m, N, P, vr = load_model(run_dir, device)
m2 = load_l2(l2_run, device) if l2_run else None # optional Layer-2; None -> L1-only (old way). By Claude 06/22/2026
print(f"device={device} gpu={torch.cuda.get_device_name(0) if device=='cuda' else 'cpu'} "
f"patch={P} N={N} vr={vr}", flush=True)
f"patch={P} N={N} vr={vr} L2={'on('+l2_run+')' if m2 is not None else 'off'}", flush=True)
_ = shift_stitch(m, torch.zeros(1, N, 64, 64, device=device), P) # warm-up
if device == "cuda":
torch.cuda.synchronize()
......@@ -134,11 +156,21 @@ def serve(run_dir, host, port):
conn, addr = srv.accept()
conn.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
print(f"{datetime.now():%H:%M:%S} client {addr}", flush=True)
h_l2 = None # Layer-2 recurrent hidden state [1,ch,H,W]; persists across INFER chunks, reset on l2_reset. By Claude 06/22/2026
age_l2 = None # L2 track-age field [1,1,H,W]; sprev_l2 = previous-frame L2 det; carried+reset like h_l2. By Claude 06/24/2026
sprev_l2 = None
try:
while True:
cmd = struct.unpack(">i", recvall(conn, 4))[0]
if cmd == CMD_BYE:
break
if cmd == CMD_STATUS:
# Reply with loaded L1 + L2 model paths (len-prefixed UTF-8); empty L2 = L1-only.
# Lets the Java client detect a model change and relaunch. By Claude on 06/24/2026
b1 = run_dir.encode("utf-8")
b2 = (l2_run or "").encode("utf-8")
conn.sendall(struct.pack(">i", len(b1)) + b1 + struct.pack(">i", len(b2)) + b2)
continue
if cmd == CMD_UPLOAD:
T, H, W = struct.unpack(">iii", recvall(conn, 12))
data = recvall(conn, T * H * W * 4)
......@@ -157,7 +189,15 @@ def serve(run_dir, host, port):
elif cmd == CMD_INFER:
level, start, count, stride, rx, ry, rw, rh = struct.unpack(">iiiiiiii", recvall(conn, 32))
rmax = struct.unpack(">d", recvall(conn, 8))[0]
lev = pyr[level] # [Tl,H,W]
l2_enable, l2_reset = struct.unpack(">ii", recvall(conn, 8)) # By Claude 06/22/2026
noise_scale = struct.unpack(">d", recvall(conn, 8))[0] # per-level L1-input noise scale from Java (single source of truth); <=0 -> server fallback. By Claude 06/24/2026
use_l2 = bool(l2_enable) and (m2 is not None)
# Per-level noise normalization: scale this level's L1 input to LEV3's absolute noise so all
# levels sit in the net's trained regime (uniform FP across levels). LEV3 -> 1.0, lower/noisier
# levels scale down, higher levels up. Independent of the age filter. By Claude on 06/24/2026
if noise_scale <= 0.0: # fallback only: Java didn't send one -> theoretical sqrt(2)^(level-ref)
noise_scale = 2.0 ** ((level - NOISE_REF_LEVEL) / 2.0)
lev = pyr[level] * noise_scale # [Tl,H,W]
H, W = lev.shape[1], lev.shape[2]
nvel = (2 * vr + 1) ** 2
o5_gpu, rf_gpu = [], []
......@@ -175,18 +215,54 @@ def serve(run_dir, host, port):
wins = torch.stack([lev[(start + (c0 + j) * stride) - N + 1:
(start + (c0 + j) * stride) + 1].flip(0) for j in range(b)]) # [b,N,H,W]
field = shift_stitch(m, wins, P) # [b,C,H,W]
o5, rf, nv = decode(field, vr, (rx, ry, rw, rh), rmax)
o5, rf, nv = decode(field, vr, (rx, ry, rw, rh), rmax) # L1: ghostbusted offset5 + ROI
nvel = nv
o5_gpu.append(o5); rf_gpu.append(rf) # keep on GPU
if use_l2:
# Layer-2 (track-before-detect) over the scene/time axis. Feed the FULL
# (non-ghostbusted) field as (s, Vx/vd, Vy/vd) px/level-frame; carry the recurrent
# hidden state across chunks (reset on l2_reset at the level's first chunk). Output
# replaces offset5 with {L1 dx, L1 dy, L2 det, L2 Vx*vd, L2 Vy*vd} (vel back to cells
# so Java's existing /vel_decimate viz scaling -> px/level-frame). By Claude 06/22/2026
ong, _, _ = decode(field, vr, (rx, ry, rw, rh), 0.0) # no ghostbuster (L2 gets full field)
l2in = torch.stack([ong[:, 2], ong[:, 3] / VEL_DECIMATE, ong[:, 4] / VEL_DECIMATE], 1) # [b,3,H,W]
# FPN-bad margins arrive as NaN; the recurrent circular conv would otherwise spread
# NaN inward by the kernel radius every frame ("eating" the borders). Sanitize the
# input so NaN can never seed/propagate through the hidden state. By Claude 06/22/2026
l2in = torch.nan_to_num(l2in, nan=0.0, posinf=0.0, neginf=0.0)
Hf, Wf = l2in.shape[2], l2in.shape[3]
if (h_l2 is None) or (h_l2.shape[2] != Hf) or (h_l2.shape[3] != Wf) or (l2_reset and c0 == 0):
h_l2 = torch.zeros(1, m2.ch_hidden, Hf, Wf, device=device, dtype=field.dtype)
age_l2 = torch.zeros(1, 1, Hf, Wf, device=device, dtype=field.dtype) # track age, carried+reset like h_l2
sprev_l2 = torch.zeros(1, 1, Hf, Wf, device=device, dtype=field.dtype) # previous-frame L2 det
dets, vxs, vys, ages = [], [], [], []
for j in range(b): # forward in time, carry hidden + age
h_l2 = m2.cell(l2in[j:j+1], h_l2)
dlog, vel = m2.decode(h_l2) # [1,1,H,W],[1,2,H,W]
s = torch.sigmoid(dlog[:, 0:1]) # [1,1,H,W] current L2 det
# AGE (track-before-detect persistence): die where det<=AGE_THR, else 1 + oldest age among
# 5x5 PREVIOUS-frame neighbors that are themselves STRONG (det >= AGE_K * local-max det) -
# so a weak-but-old straggler can't seed age; the raised AGE_THR stops the noise halo from
# dilating age across gaps. Level-uniform 5x5 (pyramid keeps ~const px/level-frame). By Claude 06/24/2026
maxS = F.max_pool2d(sprev_l2, 5, 1, 2) # local max prev-det in 5x5
elig = (sprev_l2 >= AGE_K * maxS) & (sprev_l2 > AGE_THR) # strong AND alive ancestors
prev = torch.where(elig, age_l2, torch.zeros_like(age_l2)) # only strong ancestors pass age
age_l2 = torch.where(s > AGE_THR, F.max_pool2d(prev, 5, 1, 2) + 1.0, torch.zeros_like(age_l2))
sprev_l2 = s
dets.append(s[:, 0]); ages.append(age_l2[:, 0]); vxs.append(vel[:, 0]); vys.append(vel[:, 1])
l2vx = torch.cat(vxs, 0) * VEL_DECIMATE; l2vy = torch.cat(vys, 0) * VEL_DECIMATE
o5 = torch.stack([o5[:, 0], o5[:, 1], torch.cat(dets, 0), l2vx, l2vy, torch.cat(ages, 0)], 1) # +L2 age (6th); keep L1 dx,dy
o5_gpu.append(o5); rf_gpu.append(rf) # keep on GPU (rf = L1 ROI reference even when L2 on)
if device == "cuda":
ev1.record(); torch.cuda.synchronize(); gms = ev0.elapsed_time(ev1)
else:
gms = (time.perf_counter() - t0) * 1e3
allo = torch.cat(o5_gpu, 0).cpu().numpy().astype(">f4") # [count,5,H,W] D2H untimed (dev-only)
allo = torch.cat(o5_gpu, 0).cpu().numpy().astype(">f4") # [count,nch,H,W] nch=5 (L1) or 6 (L2:+age) D2H untimed
nch = allo.shape[1] # channel count sent in the header (was hardcoded 5)
allr = torch.cat(rf_gpu, 0).cpu().numpy().astype(">f4") # [count,rh,rw,nvel]
print(f"{datetime.now():%H:%M:%S} INFER lev={level} {count} scenes (f{start}..,stride {stride}) "
f"ROI={rw}x{rh} ghost={rmax:.1f} gpu={gms:.1f}ms ({(allo.nbytes+allr.nbytes)/1e6:.1f}MB out)", flush=True)
conn.sendall(struct.pack(">diiiiii", gms, H, W, count, nvel, rh, rw))
f"ROI={rw}x{rh} ghost={rmax:.1f} nscale={noise_scale:.3f} L2={'on' if use_l2 else 'off'}{'(reset)' if (use_l2 and l2_reset) else ''} "
f"gpu={gms:.1f}ms ({(allo.nbytes+allr.nbytes)/1e6:.1f}MB out)", flush=True)
conn.sendall(struct.pack(">diiiiiii", gms, H, W, count, nch, nvel, rh, rw)) # +nch (offset channels). By Claude 06/24/2026
conn.sendall(allo.tobytes())
conn.sendall(allr.tobytes())
elif cmd == CMD_READBACK:
......@@ -203,7 +279,8 @@ def serve(run_dir, host, port):
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("--run", default="runs/weighted9_pm_s")
ap.add_argument("--l2run", default=None, help="optional Layer-2 run dir (model.pt); omit for L1-only")
ap.add_argument("--host", default="0.0.0.0")
ap.add_argument("--port", type=int, default=5577)
args = ap.parse_args()
serve(args.run, args.host, args.port)
serve(args.run, args.host, args.port, l2_run=args.l2run)
"""C5P Layer-2 (track-before-detect) — minimal circular-ConvGRU on a torus. By Claude on 06/21/2026
Layer 1 (frozen RawFCN) emits, per level-frame, a dense stride-4 field {s, Vx, Vy, dx, dy}.
Layer 2 is a RECURRENT net whose hidden state is the running 4D track memory (x, y, vx, vy),
fed a target-following 32x32 slice of that field one frame at a time. This first cut is the
SIMPLEST viable version (per Andrey 06/21):
- plain circular ConvGRU (NO explicit velocity-advection warp yet — added as a 2nd step;
the conv recurrence still learns local motion implicitly),
- dense Gaussian-bump readout (det map + Vx,Vy maps; supervise with a bump at truth),
- single target, free-orbit (absolute position = torus-local + winding offset, tracked
OUTSIDE the net; not needed for this module's forward/backward).
Torus rationale: xy is a PERIODIC 32x32 grid (Conv2d padding_mode='circular'). With the target
drift over a window staying << 32 cells, the single target "lives in infinite space" on a tiny
fixed array — no border code, translation-equivariant everywhere, trivial to batch. vx,vy are
NOT periodic (bounded by vmax; velocity does not wrap).
UNITS: the field grid is stride-4, so one torus cell = 4 scene px. Vx,Vy channels and the
velocity readout are kept in Layer-1 units (px/level-frame); vmax≈1.4 px/frame => ~0.35 cells/
frame => ~2.8 cells over N=8 (<< 32, the R<<G condition the torus relies on). The /4 conversion
to cells only matters once we add the advection warp.
Run the smoke test: python layer2.py
"""
import argparse
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
# ---------------------------------------------------------------------------
# Recurrent cell
# ---------------------------------------------------------------------------
class ConvGRUCellTorus(nn.Module):
"""One ConvGRU step with circular (toroidal) padding on the xy grid. By Claude on 06/21/2026
Standard ConvGRU:
z = sigmoid(Wz . [x, h]) update gate [B, Ch, G, G]
r = sigmoid(Wr . [x, h]) reset gate [B, Ch, G, G]
n = tanh (Wn . [x, r*h]) candidate state [B, Ch, G, G]
h'= (1 - z) * h + z * n new hidden [B, Ch, G, G]
All convs are k x k with padding_mode='circular' so the 32x32 grid wraps both axes.
"""
def __init__(self, ch_in, ch_hidden, k=3):
super().__init__()
pad = k // 2
cat = ch_in + ch_hidden # concat of input + hidden along channels
# one conv per gate; circular pad makes the receptive field wrap the torus edges
self.conv_z = nn.Conv2d(cat, ch_hidden, k, padding=pad, padding_mode='circular')
self.conv_r = nn.Conv2d(cat, ch_hidden, k, padding=pad, padding_mode='circular')
self.conv_n = nn.Conv2d(cat, ch_hidden, k, padding=pad, padding_mode='circular')
def forward(self, x, h):
# x: [B, Cin, G, G] h: [B, Ch, G, G] -> h_new: [B, Ch, G, G]
xh = torch.cat([x, h], dim=1) # [B, Cin+Ch, G, G]
z = torch.sigmoid(self.conv_z(xh)) # [B, Ch, G, G] update gate
r = torch.sigmoid(self.conv_r(xh)) # [B, Ch, G, G] reset gate
xrh = torch.cat([x, r * h], dim=1) # [B, Cin+Ch, G, G] reset-masked hidden
n = torch.tanh(self.conv_n(xrh)) # [B, Ch, G, G] candidate
return (1.0 - z) * h + z * n # [B, Ch, G, G] new hidden
# ---------------------------------------------------------------------------
# Layer-2 net
# ---------------------------------------------------------------------------
class Layer2Net(nn.Module):
"""Recurrent track-before-detect over a torus field sequence. By Claude on 06/21/2026
forward(seq) consumes T frames of the Layer-1 field slice and returns, per frame, a dense
det logit + (Vx,Vy) over the torus. Hidden state starts at 0 (no track) and accumulates
evidence across frames — the recurrence IS the track filter.
"""
def __init__(self, ch_in=3, ch_hidden=24, grid=32, vmax=1.4, k=3):
super().__init__()
self.ch_in = ch_in # field channels fed in: s, Vx, Vy
self.ch_hidden = ch_hidden # hidden track-memory channels
self.grid = grid # torus side G (cells); one cell = 4 scene px
self.vmax = vmax # velocity readout bound, px/level-frame (matches Layer-1 training vmax)
self.cell = ConvGRUCellTorus(ch_in, ch_hidden, k=k)
# readout head: hidden -> det(1) + raw Vx,Vy(2); 1x1 conv = per-cell decode
self.head = nn.Conv2d(ch_hidden, 1 + 2, 1)
def init_hidden(self, B, device, dtype):
# zero hidden = "no track yet"; [B, Ch, G, G]
return torch.zeros(B, self.ch_hidden, self.grid, self.grid, device=device, dtype=dtype)
def decode(self, h):
# h: [B, Ch, G, G] -> det_logit [B, 1, G, G], vel [B, 2, G, G] bounded to +-vmax
o = self.head(h) # [B, 3, G, G]
det = o[:, 0:1] # [B, 1, G, G] raw logit
vel = self.vmax * torch.tanh(o[:, 1:3]) # [B, 2, G, G] px/level-frame
return det, vel
def forward(self, seq, h=None):
# seq: [B, T, Cin, G, G] -> det [B, T, 1, G, G], vel [B, T, 2, G, G]
B, T = seq.shape[0], seq.shape[1]
if h is None:
h = self.init_hidden(B, seq.device, seq.dtype)
dets, vels = [], []
for t in range(T): # BPTT unrolls this loop
h = self.cell(seq[:, t], h) # [B, Ch, G, G] recurrent update
det, vel = self.decode(h) # per-frame readout
dets.append(det)
vels.append(vel)
det = torch.stack(dets, dim=1) # [B, T, 1, G, G]
vel = torch.stack(vels, dim=1) # [B, T, 2, G, G]
return det, vel
# ---------------------------------------------------------------------------
# Dense Gaussian-bump supervision (single target)
# ---------------------------------------------------------------------------
def bump_target(pos_xy, grid, sigma=1.0, device="cpu"):
"""Toroidal Gaussian bump at (sub-cell) position pos_xy. By Claude on 06/21/2026
pos_xy: [B, T, 2] (x, y) in torus cells (may be fractional / out of [0,G) — wraps).
Returns det bump [B, T, 1, G, G] in [0,1]. Distance uses the WRAPPED (toroidal) metric so
a target near the edge still gets a single round bump that straddles the seam.
"""
B, T = pos_xy.shape[0], pos_xy.shape[1]
coord = torch.arange(grid, device=device).float() # [G]
gy, gx = torch.meshgrid(coord, coord, indexing='ij') # [G, G] each
gx = gx[None, None]; gy = gy[None, None] # [1,1,G,G] broadcast over B,T
px = pos_xy[..., 0][..., None, None] # [B, T, 1, 1]
py = pos_xy[..., 1][..., None, None] # [B, T, 1, 1]
# wrapped (toroidal) coordinate difference: nearest image around the G-periodic grid
dx = (gx - px + grid / 2) % grid - grid / 2 # [B, T, G, G] in (-G/2, G/2]
dy = (gy - py + grid / 2) % grid - grid / 2
g = torch.exp(-(dx * dx + dy * dy) / (2 * sigma * sigma)) # [B, T, G, G]
return g[:, :, None] # [B, T, 1, G, G]
def layer2_loss(det_logit, vel, det_t, vel_t, support=0.3, pos_weight=20.0):
"""Detection BCE (sparse bump -> pos_weight) + velocity MSE on the bump support. By Claude 06/21
det_logit: [B,T,1,G,G] raw det_t: [B,T,1,G,G] in [0,1]
vel: [B,T,2,G,G] vel_t: [B,T,2,G,G] (px/level-frame; only used where det_t>support)
"""
pw = torch.tensor(pos_weight, device=det_logit.device)
l_det = F.binary_cross_entropy_with_logits(det_logit, det_t, pos_weight=pw)
m = (det_t > support) # [B,T,1,G,G] bump core mask
if m.any():
m2 = m.expand_as(vel) # [B,T,2,G,G]
l_vel = F.mse_loss(vel[m2], vel_t[m2])
else:
l_vel = vel.sum() * 0.0
return l_det + 0.3 * l_vel, {"det": float(l_det.detach()), "vel": float(l_vel.detach() if torch.is_tensor(l_vel) else l_vel)}
# ---------------------------------------------------------------------------
# Smoke test: fake Layer-1-like field, single target on a wrapping straight line.
# Verifies the module trains end-to-end (forward + BPTT + loss) BEFORE real Layer-1 fields.
# This is NOT the real training data — that comes in the next step (trajectory-sequence gen).
# ---------------------------------------------------------------------------
def fake_field_batch(rng, B, T, grid, vmax, sigma=1.0, snr=4.0, device="cpu"):
"""Build a toy 'Layer-1 field' sequence + truth. By Claude on 06/21/2026
A single target starts at a random torus cell, moves at constant (vx,vy) px/frame
(=> (vx,vy)/4 cells/frame), wrapping. The s-channel is a noisy Gaussian bump at the target;
Vx,Vy channels carry the true velocity over the bump (+ noise), 0 elsewhere. Returns:
seq [B,T,3,G,G] (s, Vx, Vy)
pos [B,T,2] target (x,y) in cells
veltru [B,T,2] true (Vx,Vy) px/level-frame
"""
seq = torch.zeros(B, T, 3, grid, grid, device=device)
pos = torch.zeros(B, T, 2, device=device)
veltru = torch.zeros(B, T, 2, device=device)
for b in range(B):
x0 = rng.uniform(0, grid); y0 = rng.uniform(0, grid)
ang = rng.uniform(0, 2 * np.pi); spd = rng.uniform(0.3, 1.0) * vmax
vx = spd * np.cos(ang); vy = spd * np.sin(ang) # px/level-frame
for t in range(T):
cx = (x0 + vx / 4.0 * t) # cells (stride-4 => /4)
cy = (y0 + vy / 4.0 * t)
pos[b, t, 0] = cx % grid; pos[b, t, 1] = cy % grid
veltru[b, t, 0] = vx; veltru[b, t, 1] = vy
# s channel: noisy toroidal bump at the target; vel channels: truth over the bump
bump = bump_target(pos[b:b+1].unsqueeze(0).reshape(1, T, 2), grid, sigma, device) # [1,T,1,G,G]
bump = bump[0, :, 0] # [T,G,G]
noise = torch.from_numpy(rng.standard_normal((T, grid, grid)).astype(np.float32)).to(device)
seq[b, :, 0] = (snr * bump + noise).clamp(min=0.0) # s >= 0, SNR-scaled signal in noise
core = (bump > 0.3).float() # [T,G,G]
seq[b, :, 1] = vx * core; seq[b, :, 2] = vy * core
return seq, pos, veltru
def smoke_test(steps=400, B=16, T=8, grid=32, vmax=1.4, device=None):
"""Overfit the toy generator a few hundred steps; det peak should sharpen, vel MSE drop."""
device = device or ("cuda" if torch.cuda.is_available() else "cpu")
rng = np.random.default_rng(0)
net = Layer2Net(ch_in=3, ch_hidden=24, grid=grid, vmax=vmax).to(device)
opt = torch.optim.Adam(net.parameters(), 2e-3)
nparams = sum(p.numel() for p in net.parameters())
print(f"Layer2Net: {nparams} params, grid={grid}, ch_hidden=24, device={device}", flush=True)
for step in range(1, steps + 1):
seq, pos, veltru = fake_field_batch(rng, B, T, grid, vmax, device=device)
det_t = bump_target(pos, grid, sigma=1.0, device=device) # [B,T,1,G,G]
vel_t = torch.zeros(B, T, 2, grid, grid, device=device)
core = (det_t[:, :, 0] > 0.3) # [B,T,G,G]
for c in range(2):
vel_t[:, :, c][core] = veltru[..., c][..., None, None].expand(-1, -1, grid, grid)[core]
det_logit, vel = net(seq)
loss, comp = layer2_loss(det_logit, vel, det_t, vel_t)
opt.zero_grad(); loss.backward(); opt.step()
if step % 50 == 0 or step == 1:
with torch.no_grad():
p = torch.sigmoid(det_logit)
peak = float(p[det_t > 0.3].mean())
bg = float(p[det_t < 0.05].max())
print(f"step {step:4d} det {comp['det']:.4f} vel {comp['vel']:.4f} "
f"peak(s@truth) {peak:.3f} max-bg {bg:.3f}", flush=True)
print("smoke test done.", flush=True)
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument("--steps", type=int, default=400)
ap.add_argument("--grid", type=int, default=32)
ap.add_argument("--vmax", type=float, default=1.4)
a = ap.parse_args()
smoke_test(steps=a.steps, grid=a.grid, vmax=a.vmax)
#!/usr/bin/env bash
# Start/stop the CUAS DGX inference server (PyTorch RawFCN, cuDNN) in the NGC container.
# By Claude on 06/20/2026. Run on the DGX (elphel@192.168.0.62).
# start|stop|logs|status env: RUN=runs/<model> PORT=5577
# start|stop|logs|status env: RUN=runs/<model> RUN2=runs/<l2> PORT=5577
set -euo pipefail
NAME=cuas_infer
IMG=nvcr.io/nvidia/pytorch:25.10-py3
CODE=/home/elphel/c5p_dnn
RUN="${RUN:-runs/weighted9_pm_s}"
RUN2="${RUN2:-}" # optional Layer-2 run dir; empty -> L1-only. By Claude 06/22/2026
PORT="${PORT:-5577}"
case "${1:-start}" in
start)
docker rm -f "$NAME" >/dev/null 2>&1 || true
L2ARG=""; [ -n "$RUN2" ] && L2ARG="--l2run $RUN2"
docker run -d --name "$NAME" --gpus all --network host \
-v "$CODE":/work -w /work "$IMG" \
python infer_server.py --run "$RUN" --port "$PORT" >/dev/null
echo "started $NAME (run=$RUN port=$PORT)"; sleep 3; docker logs "$NAME"
python infer_server.py --run "$RUN" $L2ARG --port "$PORT" >/dev/null
echo "started $NAME (run=$RUN l2=${RUN2:-off} port=$PORT)"; sleep 3; docker logs "$NAME"
;;
stop) docker rm -f "$NAME" >/dev/null 2>&1 && echo "stopped" || echo "not running" ;;
logs) docker logs --tail 60 "$NAME" ;;
status) docker ps --filter "name=$NAME" --format "{{.Names}} {{.Status}}" ;;
*) echo "usage: $0 {start|stop|logs|status} (env: RUN=, PORT=)"; exit 1 ;;
*) echo "usage: $0 {start|stop|logs|status} (env: RUN=, RUN2=, PORT=)"; exit 1 ;;
esac
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment