Commit 1bf9e9e4 authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: Add RT detector skeleton (com.elphel.imagej.cuas.rt)

Pure-Java prototype CuasRTDetector + RtParams parameters class +
CuasRTLib JNA stub (activate when JNA added to pom.xml).
No dependencies added; build is clean. See design discussion in
attic/imagej-elphel-internal/handoffs/2026-05-21_RealTimeDesign.md.
Co-authored-by: 's avatarClaude <claude@elphel.com>
parent 9dacd956
/**
** CuasRTDetector.java
**
** Copyright (C) 2026 Elphel, Inc.
**
** -----------------------------------------------------------------------------**
**
** CuasRTDetector.java is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
*/
package com.elphel.imagej.cuas.rt;
import java.util.ArrayList;
/**
* Real-time CUAS target detector — pure-Java prototype (single temporal pyramid level).
*
* This is the reference implementation used for algorithm tuning before the
* C++/CUDA port in cuas_rt_gpu/src/rt_detector.cu. Keep all methods as
* static pure functions (flat float[] arrays, no streams, no ArrayList in hot
* paths) so each method maps 1:1 to a C++ function and eventually a CUDA kernel.
*
* Architecture overview (single pyramid level):
*
* fpixels[nFrames][width*height] ← already temporally averaged/decimated
* │
* ▼
* layer1()[nChan][width*height] ← Layer 1: spatio-temporal matched filter
* │ one response map per velocity channel
* ▼
* updateAccumChannel() × nChan ← Layer 2: per-channel recurrent buffer
* shift → mem*accum+obs*layer1 → leakyReLU
* │
* ▼
* findPeaks() → float[] (x,y) pairs ← Stage 2 trigger candidates
*
* Velocity channels:
* nChan = (2*maxVel+1)^2, ivx/ivy in [-maxVel..+maxVel] px/sample.
* chanIdx(ivx,ivy) = (ivy+maxVel)*nVel + (ivx+maxVel) [ivy major, ivx minor]
*
* 50% overlap mode (recommended):
* Run at stride = nFrames/2; consecutive windows share half their frames.
* Reduces worst-case detection latency from nFrames to nFrames/2 samples.
* The accumulation buffer's memCoeff absorbs the resulting double-counting smoothly.
*
* Validation against current software:
* Feed the same fpixels[] used by refineMotionVectors() at a single target
* location and compare findPeaks() output to the current detection result.
*/
public class CuasRTDetector {
// =========================================================================
// Layer 1: spatio-temporal matched filter
// =========================================================================
/**
* Layer 1 response for one velocity channel (ivx, ivy).
*
* For each output pixel (x,y), sums a 3x3 spatial neighborhood across
* all nFrames temporal layers. Layer t is spatially offset by
* (ivx*dt, ivy*dt) where dt = t - tMid, so the filter "tracks" a
* hypothetical target moving at velocity (ivx,ivy) px/sample.
*
* Sub-pixel shift: currently nearest-integer.
* TODO: replace with bilinear interpolation for smoother velocity response.
*
* Result is ADDED into out[] so callers can accumulate across multiple calls.
*
* @param fpixels [nFrames][width*height] input (temporally decimated)
* @param width image width in pixels
* @param height image height in pixels
* @param ivx channel velocity X in [-maxVel..+maxVel]
* @param ivy channel velocity Y in [-maxVel..+maxVel]
* @param out [width*height] output array (accumulated into, not cleared here)
*/
public static void layer1Channel(
float[][] fpixels,
int width, int height,
int ivx, int ivy,
float[] out)
{
int nFrames = fpixels.length;
int tMid = nFrames / 2;
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
float sum = 0.0f;
for (int t = 0; t < nFrames; t++) {
int dt = t - tMid;
int sx = x - ivx * dt;
int sy = y - ivy * dt;
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
int px = sx + dx, py = sy + dy;
if (px >= 0 && px < width && py >= 0 && py < height) {
sum += fpixels[t][py * width + px];
}
}
}
}
out[y * width + x] += sum;
}
}
}
/**
* Full Layer 1 output: compute response for all nChan velocity channels.
*
* @param fpixels [nFrames][width*height]
* @param p RT parameters
* @param width image width
* @param height image height
* @return [nChan][width*height], indexed by p.chanIdx(ivx,ivy)
*/
public static float[][] layer1(
float[][] fpixels,
RtParams p,
int width, int height)
{
int nChan = p.nChan();
float[][] out = new float[nChan][width * height];
for (int ivy = -p.maxVel; ivy <= p.maxVel; ivy++) {
for (int ivx = -p.maxVel; ivx <= p.maxVel; ivx++) {
layer1Channel(fpixels, width, height, ivx, ivy,
out[p.chanIdx(ivx, ivy)]);
}
}
return out;
}
// =========================================================================
// Layer 2: recurrent accumulation buffer helpers
// =========================================================================
/**
* Integer spatial shift of buf[] by (vx, vy) pixels, in place.
* Pixels shifted outside the image boundary are zeroed.
*
* For non-integer velocities: replace with bilinear interpolation.
* The half-pixel grid case (|velocity| = 0.5) is a planned intermediate step.
*
* @param buf [width*height] modified in place
* @param width image width
* @param height image height
* @param vx shift in X (positive = rightward)
* @param vy shift in Y (positive = downward)
*/
public static void shiftBuffer(float[] buf, int width, int height, int vx, int vy) {
float[] tmp = new float[buf.length];
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
int sx = x - vx, sy = y - vy;
if (sx >= 0 && sx < width && sy >= 0 && sy < height) {
tmp[y * width + x] = buf[sy * width + sx];
}
// else tmp[y*width+x] stays 0
}
}
System.arraycopy(tmp, 0, buf, 0, buf.length);
}
/**
* Leaky ReLU activation applied in place.
* x >= 0 → x unchanged; x < 0 → x * slope.
* slope=1.0 gives linear (no competition); slope→0 approaches hard ReLU.
*/
public static void leakyReLU(float[] buf, float slope) {
for (int i = 0; i < buf.length; i++) {
if (buf[i] < 0.0f) buf[i] *= slope;
}
}
/**
* Update one velocity channel's accumulation buffer with a new Layer 1 observation.
*
* Loop per frame:
* 1. Spatial shift accumBuf by (ivx, ivy) — tracks the target
* 2. accumBuf = memCoeff*accumBuf + obsCoeff*layer1 — integrate
* 3. leakyReLU(accumBuf, leakySlope) — suppress noise / control competition
*
* @param accumBuf [width*height] persistent buffer, updated in place
* @param layer1 [width*height] new Layer 1 observation for this channel
* @param p RT parameters
* @param width image width
* @param height image height
* @param ivx channel velocity X
* @param ivy channel velocity Y
*/
public static void updateAccumChannel(
float[] accumBuf, float[] layer1,
RtParams p, int width, int height,
int ivx, int ivy)
{
shiftBuffer(accumBuf, width, height, ivx, ivy);
float mc = p.memCoeff, oc = p.obsCoeff;
for (int i = 0; i < accumBuf.length; i++) {
accumBuf[i] = mc * accumBuf[i] + oc * layer1[i];
}
leakyReLU(accumBuf, p.leakySlope);
}
/**
* Update all nChan accumulation buffers with new Layer 1 output.
*
* @param accumBufs [nChan][width*height] persistent state, updated in place
* @param layer1Out [nChan][width*height] from layer1()
* @param p RT parameters
* @param width image width
* @param height image height
*/
public static void updateAccum(
float[][] accumBufs, float[][] layer1Out,
RtParams p, int width, int height)
{
for (int ivy = -p.maxVel; ivy <= p.maxVel; ivy++) {
for (int ivx = -p.maxVel; ivx <= p.maxVel; ivx++) {
int ich = p.chanIdx(ivx, ivy);
updateAccumChannel(accumBufs[ich], layer1Out[ich],
p, width, height, ivx, ivy);
}
}
}
// =========================================================================
// Stage 2 trigger: peak detection
// =========================================================================
/**
* Cross-channel maximum projection: for each pixel, take the max value
* across all velocity channels in the accumulation buffer.
* This collapses [nChan][width*height] → [width*height].
*
* @param accumBufs [nChan][width*height]
* @param width image width
* @param height image height
* @return [width*height] max projection
*/
public static float[] maxProjection(float[][] accumBufs, int width, int height) {
float[] proj = new float[width * height];
for (float[] ch : accumBufs) {
for (int i = 0; i < proj.length; i++) {
if (ch[i] > proj[i]) proj[i] = ch[i];
}
}
return proj;
}
/**
* Detect peaks in a [width*height] map using 3x3 non-maximum suppression.
* A pixel is a peak if it is above thresh AND strictly greater than all 8 neighbors.
*
* Returns float[] of (x, y) pairs: [x0, y0, x1, y1, ...].
* Border pixels (1-pixel margin) are excluded.
*
* For Stage 2 trigger logic: before spawning a new Stage 2 worker, check
* the presence bitmask to avoid duplicating an already-tracked target.
*
* @param map [width*height] projection (e.g. from maxProjection)
* @param p RT parameters (uses p.peakThresh)
* @param width image width
* @param height image height
* @return float[] of (x,y) pairs, length = 2 * numPeaks
*/
public static float[] findPeaks(float[] map, RtParams p, int width, int height) {
ArrayList<Float> peaks = new ArrayList<>();
for (int y = 1; y < height - 1; y++) {
for (int x = 1; x < width - 1; x++) {
float v = map[y * width + x];
if (v < p.peakThresh) continue;
boolean isMax = true;
outer:
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
if (dx == 0 && dy == 0) continue;
if (map[(y + dy) * width + (x + dx)] >= v) {
isMax = false;
break outer;
}
}
}
if (isMax) {
peaks.add((float) x);
peaks.add((float) y);
}
}
}
float[] result = new float[peaks.size()];
for (int i = 0; i < result.length; i++) result[i] = peaks.get(i);
return result;
}
// =========================================================================
// Top-level single-level pipeline
// =========================================================================
/**
* Run the full single-pyramid-level detection pipeline on one input window.
*
* State (accumBufs) is kept between calls by the caller; it represents
* the recurrent memory across input windows.
*
* @param fpixels [nFrames][width*height] input window (temporally decimated)
* @param accumBufs [nChan][width*height] persistent accumulation state (in/out)
* @param p RT parameters
* @param width image width
* @param height image height
* @return float[] of (x,y) peak pairs from this window
*/
public static float[] processWindow(
float[][] fpixels,
float[][] accumBufs,
RtParams p,
int width, int height)
{
float[][] layer1Out = layer1(fpixels, p, width, height);
updateAccum(accumBufs, layer1Out, p, width, height);
float[] proj = maxProjection(accumBufs, width, height);
return findPeaks(proj, p, width, height);
}
/**
* Allocate fresh (zeroed) accumulation buffers for a new session.
*
* @param p RT parameters
* @param width image width
* @param height image height
* @return [nChan][width*height] zero-initialized
*/
public static float[][] allocAccum(RtParams p, int width, int height) {
return new float[p.nChan()][width * height];
}
}
/**
** CuasRTLib.java
**
** Copyright (C) 2026 Elphel, Inc.
**
** -----------------------------------------------------------------------------**
**
** CuasRTLib.java is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
*/
package com.elphel.imagej.cuas.rt;
/**
* JNA binding to libcuasrt.so — the native CUDA real-time detector library.
*
* STATUS: stub — activate when JNA is added to pom.xml.
*
* -------------------------------------------------------------------------
* TO ACTIVATE: add to pom.xml dependencies:
*
* <dependency>
* <groupId>net.java.dev.jna</groupId>
* <artifactId>jna</artifactId>
* <version>5.14.0</version>
* </dependency>
*
* Then replace this class with the active interface (sketch below).
* -------------------------------------------------------------------------
*
* ACTIVE INTERFACE SKETCH (not compiled — JNA not yet on classpath):
*
* import com.sun.jna.Library;
* import com.sun.jna.Native;
* import com.sun.jna.Pointer;
* import com.sun.jna.Structure;
* import java.util.Arrays;
* import java.util.List;
*
* // RtParamsJna: JNA Structure mapping to C RtParams in rt_lib.h
* public static class RtParamsJna extends Structure {
* public int max_vel;
* public int width;
* public int height;
* public float mem_coeff;
* public float obs_coeff;
* public float leaky_slope;
* public float peak_thresh;
*
* @Override protected List<String> getFieldOrder() {
* return Arrays.asList("max_vel","width","height",
* "mem_coeff","obs_coeff","leaky_slope","peak_thresh");
* }
* }
*
* public interface CuasRTLib extends Library {
* CuasRTLib INSTANCE = Native.load("cuasrt", CuasRTLib.class);
*
* // Create GPU context; returns opaque handle.
* Pointer cuasrt_create(int width, int height, RtParamsJna p);
*
* // Destroy GPU context and free all GPU memory.
* void cuasrt_destroy(Pointer ctx);
*
* // Layer 1 matched filter.
* // fpixels: [nframes * width * height], row-major, host memory.
* // layer1Out: [n_chan * width * height] output, n_chan = (2*max_vel+1)^2.
* void cuasrt_layer1(Pointer ctx, float[] fpixels, int nframes, float[] layer1Out);
*
* // Update accumulation buffer; returns number of peaks found.
* // layer1Out: [n_chan * width * height] from cuasrt_layer1.
* // accumBuf: [n_chan * width * height] persistent state (in/out).
* // peaksXy: output [(x,y) pairs], length >= maxPeaks*2.
* int cuasrt_update_accum(Pointer ctx,
* float[] layer1Out, float[] accumBuf,
* RtParamsJna p, float[] peaksXy, int maxPeaks);
* }
*
* -------------------------------------------------------------------------
* LIBRARY PATH:
* Set -Djna.library.path=/path/to/cuas_rt_gpu/build at JVM startup,
* or place libcuasrt.so on LD_LIBRARY_PATH.
* libcudart.so must also be accessible (ships with CUDA toolkit).
*
* CUDA CONTEXT:
* libcuasrt.so creates its own CUDA context (device 0).
* It runs independently of the JCuda tile-processor context.
* When both pipelines run together, context sharing via cudaSetDevice
* (CUDA 12 primary context) can be added without API changes.
* -------------------------------------------------------------------------
*/
public final class CuasRTLib {
// Placeholder — replace with active JNA interface once JNA is in pom.xml.
private CuasRTLib() {}
}
/**
** RtParams.java
**
** Copyright (C) 2026 Elphel, Inc.
**
** -----------------------------------------------------------------------------**
**
** RtParams.java is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
*/
package com.elphel.imagej.cuas.rt;
/**
* Runtime parameters for the real-time CUAS target detector.
*
* Mirrors the RtParams C struct in cuas_rt_gpu/src/rt_lib.h.
* When JNA is added to pom.xml this class will gain a JNA Structure counterpart
* (RtParamsJna extends com.sun.jna.Structure) for direct memory mapping.
*
* Velocity channels:
* X and Y each span [-maxVel .. +maxVel] pixels/sample.
* Total channels: nChan() = (2*maxVel+1)^2 = 25 for maxVel=2.
* Channel index: chanIdx(ivx,ivy) = (ivy+maxVel)*nVel() + (ivx+maxVel)
* Units are pixels per input sample; input is already temporally
* averaged/decimated for the specific pyramid level, so the same
* maxVel applies at every pyramid level.
*/
public class RtParams {
/** Velocity range per axis [-maxVel .. +maxVel] px/sample. Start with 2; try 1 and 3. */
public int maxVel = 2;
/** Accumulation buffer memory coefficient (high persistence). */
public float memCoeff = 0.9f;
/** New observation weight (typically 1 - memCoeff). */
public float obsCoeff = 0.1f;
/**
* Leaky ReLU negative slope inside accumulation loop.
* Controls competition vs. pure linear accumulation:
* slope=1.0 → linear (maximum SNR improvement, no competition)
* slope→0 → hard ReLU (strong competition, earlier saturation)
* Tune experimentally; start near 1.0 and reduce toward 0.01.
*/
public float leakySlope = 0.1f;
/** Accumulation buffer peak detection threshold for Stage 2 trigger. */
public float peakThresh = 0.5f;
public RtParams() {}
public RtParams(int maxVel, float memCoeff, float obsCoeff,
float leakySlope, float peakThresh) {
this.maxVel = maxVel;
this.memCoeff = memCoeff;
this.obsCoeff = obsCoeff;
this.leakySlope = leakySlope;
this.peakThresh = peakThresh;
}
/** Number of velocity channels per axis: 2*maxVel+1. */
public int nVel() { return 2 * maxVel + 1; }
/** Total 2D velocity channels: nVel()^2. */
public int nChan() { int n = nVel(); return n * n; }
/**
* Flat channel index for velocity (ivx, ivy), both in [-maxVel..+maxVel].
* ivy is the major index (row), ivx is the minor index (column).
*/
public int chanIdx(int ivx, int ivy) {
return (ivy + maxVel) * nVel() + (ivx + maxVel);
}
/** Recover ivx from channel index. */
public int ivxFromChan(int ich) { return (ich % nVel()) - maxVel; }
/** Recover ivy from channel index. */
public int ivyFromChan(int ich) { return (ich / nVel()) - maxVel; }
@Override
public String toString() {
return String.format(
"RtParams{maxVel=%d, nChan=%d, mem=%.3f, obs=%.3f, leaky=%.3f, thresh=%.3f}",
maxVel, nChan(), memCoeff, obsCoeff, leakySlope, peakThresh);
}
}
/**
* Real-time CUAS target detector — skeleton package.
*
* Design discussion: attic/imagej-elphel-internal/handoffs/2026-05-21_RealTimeDesign.md
*
* Contents:
* RtParams — runtime parameters (mirrors RtParams C struct in libcuasrt.so)
* CuasRTDetector — pure-Java prototype; reference implementation for tuning
* CuasRTLib — JNA binding stub (activate when JNA added to pom.xml)
*
* Implementation order (next week):
* 1. Tune CuasRTDetector against real data at a single pyramid level
* 2. Port to C++/CUDA in cuas_rt_gpu, validate numerically against Java
* 3. Wire CuasRTLib JNA binding, run Java → libcuasrt.so end-to-end
* 4. Extend to full temporal pyramid
*/
package com.elphel.imagej.cuas.rt;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment