#!/usr/bin/env python3
# NOTE (for future Java port):
# - Scan for files ending with "-FIELD_CALIBRATION.corr-xml"
# - Parse <entry key="PREFIX.extrinsic_corr_{azimuth|tilt|roll}{sensor}">value</entry>
# - Output CSV with columns: scene_id, noise, azimuth0, tilt0, roll0, azimuth1, tilt1, roll1, ... (sensor-major order)
# - scene_id is the parent directory name of the file (timestamp)
import argparse
import csv
import os
import re
import sys
import xml.etree.ElementTree as ET

KEY_RE = re.compile(r'^(?P<prefix>\w+)\.extrinsic_corr_(?P<kind>azimuth|tilt|roll)(?P<sensor>\d+)$')


def iter_files(root):
    for dirpath, _, filenames in os.walk(root):
        for name in filenames:
            if name.endswith('-FIELD_CALIBRATION.corr-xml'):
                yield os.path.join(dirpath, name)


def parse_file(path):
    try:
        tree = ET.parse(path)
    except Exception as e:
        return None, f"parse_error: {e}"
    root = tree.getroot()
    data = {}  # (prefix, sensor) -> {kind: value}
    for entry in root.findall('entry'):
        key = entry.get('key')
        if not key:
            continue
        m = KEY_RE.match(key)
        if not m:
            continue
        prefix = m.group('prefix')
        kind = m.group('kind')
        sensor = int(m.group('sensor'))
        try:
            value = float((entry.text or '').strip())
        except Exception:
            continue
        data.setdefault((prefix, sensor), {})[kind] = value
    return data, None


def scene_id_from_path(path):
    # Folder name just above the file is the scene timestamp
    # Example: .../1763232535_880101/1763232535_880101-FIELD_CALIBRATION.corr-xml
    return os.path.basename(os.path.dirname(path))


def main():
    ap = argparse.ArgumentParser(description='Aggregate FIELD_CALIBRATION corr-xml into CSV')
    ap.add_argument('root', help='Root directory to scan (e.g., /media/elphel/.../models/1763232117-1763234145)')
    ap.add_argument('-o', '--out', required=True, help='Output CSV path')
    ap.add_argument('--prefix', default='EYESIS_DCT_AUX', help='Prefix filter (default: EYESIS_DCT_AUX). Use ALL for any.')
    ap.add_argument('--window', type=int, default=10, help='Running average window for noise (default: 10)')
    args = ap.parse_args()

    row_count = 0
    errors = 0
    # scene_id -> {(prefix, sensor): {kind: value}}
    by_scene = {}
    for path in iter_files(args.root):
        data, err = parse_file(path)
        if err:
            errors += 1
            continue
        if not data:
            continue
        sid = scene_id_from_path(path)
        by_scene.setdefault(sid, {})
        for key, vals in data.items():
            by_scene[sid][key] = vals

    # Build headers: angle + sensor number
    sensors = list(range(16))
    angles = ['azimuth', 'tilt', 'roll']
    headers = ['scene_id', 'noise', 'noise_azimuth', 'noise_tilt', 'noise_roll'] + [f"{a}{s}" for s in sensors for a in angles]

    with open(args.out, 'w', newline='') as f:
        w = csv.writer(f)
        w.writerow(headers)
        # Sort scene IDs by numeric timestamp if possible (replace '_' with '.')
        def scene_key(s):
            try:
                return float(s.replace('_', '.'))
            except Exception:
                return s

        sorted_sids = sorted(by_scene.keys(), key=scene_key)

        # Precompute running averages per column
        cols = [f"{a}{s}" for s in sensors for a in angles]
        col_values = {c: [] for c in cols}  # list aligned with sorted_sids
        for sid in sorted_sids:
            scene_data = by_scene[sid]
            for c in cols:
                # c like azimuth3 -> angle+sensor
                a = ''.join([ch for ch in c if ch.isalpha()])
                s = int(''.join([ch for ch in c if ch.isdigit()]))
                val = None
                if args.prefix == 'ALL':
                    for (prefix, sensor), v in scene_data.items():
                        if sensor == s and a in v:
                            val = v[a]
                            break
                else:
                    v = scene_data.get((args.prefix, s))
                    if v is not None:
                        val = v.get(a)
                col_values[c].append(val)

        def running_avg(vals, idx, window):
            # centered window over non-null values
            if window <= 1:
                return vals[idx] if vals[idx] is not None else None
            half = window // 2
            start = max(0, idx - half)
            end = min(len(vals) - 1, idx + (window - half - 1))
            acc = 0.0
            n = 0
            for j in range(start, end + 1):
                v = vals[j]
                if v is None:
                    continue
                acc += v
                n += 1
            return (acc / n) if n else None

        for i, sid in enumerate(sorted_sids):
            row = [sid]
            scene_data = by_scene[sid]
            # compute noise (RMS), total + per-angle with per-angle sample counts
            noise_sum_by_angle = {a: 0.0 for a in angles}
            noise_count_by_angle = {a: 0 for a in angles}
            for c in cols:
                v = col_values[c][i]
                if v is None:
                    continue
                avg = running_avg(col_values[c], i, args.window)
                if avg is None:
                    continue
                hp = v - avg
                # angle name is alpha prefix of column
                a = ''.join([ch for ch in c if ch.isalpha()])
                if a in noise_sum_by_angle:
                    noise_sum_by_angle[a] += hp * hp
                    noise_count_by_angle[a] += 1

            az_rms = (noise_sum_by_angle['azimuth'] / noise_count_by_angle['azimuth']) ** 0.5 if noise_count_by_angle['azimuth'] else 0.0
            tilt_rms = (noise_sum_by_angle['tilt'] / noise_count_by_angle['tilt']) ** 0.5 if noise_count_by_angle['tilt'] else 0.0
            roll_rms = (noise_sum_by_angle['roll'] / noise_count_by_angle['roll']) ** 0.5 if noise_count_by_angle['roll'] else 0.0
            noise = ((az_rms * az_rms + tilt_rms * tilt_rms + roll_rms * roll_rms) / 3.0) ** 0.5
            row.append(noise)
            row.append(az_rms)
            row.append(tilt_rms)
            row.append(roll_rms)
            for s in sensors:
                for a in angles:
                    val = None
                    # prefer matching prefix if specified
                    if args.prefix == 'ALL':
                        # pick first available prefix
                        for (prefix, sensor), vals in scene_data.items():
                            if sensor == s and a in vals:
                                val = vals[a]
                                break
                    else:
                        vals = scene_data.get((args.prefix, s))
                        if vals is not None:
                            val = vals.get(a)
                    row.append(val)
            w.writerow(row)
            row_count += 1
        # Append stats at the bottom (name/value/comment in first 3 columns)
        w.writerow([])
        w.writerow(["STAT", "name", "value", "comment"])

        # Collect stats
        def stddev(vals):
            if not vals:
                return None
            mean = sum(vals) / len(vals)
            var = sum((v - mean) ** 2 for v in vals) / len(vals)
            return var ** 0.5

        angle_stats = {a: [] for a in angles}  # (std, sensor, n)
        overall = []
        for s in sensors:
            for a in angles:
                vals = []
                # rebuild vals from by_scene
                for sid in by_scene.keys():
                    scene_data = by_scene[sid]
                    if args.prefix == 'ALL':
                        found = None
                        for (prefix, sensor), v in scene_data.items():
                            if sensor == s and a in v:
                                found = v[a]
                                break
                        if found is not None:
                            vals.append(found)
                    else:
                        v = scene_data.get((args.prefix, s))
                        if v is not None and a in v:
                            vals.append(v[a])
                sd = stddev(vals)
                if sd is not None:
                    angle_stats[a].append((sd, s, len(vals)))
                    overall.append((sd, a, s, len(vals)))

        for a in angles:
            if angle_stats[a]:
                angle_stats[a].sort(reverse=True)
                sd, s, n = angle_stats[a][0]
                w.writerow(["STAT", f"worst_{a}_sensor", s, f"std={sd:.6g}, n={n}"])

        overall.sort(reverse=True)
        w.writerow(["STAT", "top5_by_std", "", "format: angle sensor std n"])
        for sd, a, s, n in overall[:5]:
            w.writerow(["STAT", f"{a}{s}", f"{sd:.6g}", f"n={n}"])

    print(f"Wrote {row_count} rows to {args.out}")
    if errors:
        print(f"Skipped {errors} files due to parse errors", file=sys.stderr)


if __name__ == '__main__':
    main()
