Commit c7ec55ec authored by Andrey Filippov's avatar Andrey Filippov

CLAUDE: Phase 0 — Elphel LWIR-16 → COLMAP pose+camera bridge

Reads 32-bit float scene slices from *-TERRAIN-DISP-MERGED.tiff and
per-scene poses from *-INTERFRAME.corr-xml, outputs COLMAP text-format
sparse model (cameras.txt, images.txt, empty points3D.txt) plus
16-bit PNG images for OpenMVS ingestion.

Camera: PINHOLE, fl derived from 32° HFoV (1116 px for 640 px width).
Rotation: mirrors GeometryCorrection.getCommonRotMatrix(); convention
verified in Phase 3 Blender smoke test.
Co-authored-by: 's avatarClaude <claude@elphel.com>
parents
__pycache__/
*.pyc
*.pyo
*.egg-info/
dist/
build/
.venv/
*.so
attic/
This diff is collapsed.
# foliage
Open-source tools for Elphel LWIR-16 foliage-penetration (FOPEN) 3D reconstruction.
Developed under a USAF contract in collaboration with the University of Utah.
All code is released under **GPLv3** with Elphel copyright.
## Subproject: elphel_to_mvs (Phase 0 — Pose + Camera-Model Bridge)
Converts Elphel imagej-elphel segment outputs to COLMAP / OpenMVS format.
### Input files (produced by imagej-elphel)
| File | Content |
|------|---------|
| `*-TERRAIN-DISP-MERGED.tiff` | Multi-frame 32-bit float TIFF; each timestamp-labelled slice is one scene's fused 640×512 LWIR image |
| `*-INTERFRAME.corr-xml` | Per-scene poses relative to the reference scene (X Y Z azimuth tilt roll) |
### Output (COLMAP text format)
```
<out>/
images/ 16-bit grayscale PNG per scene
sparse/0/
cameras.txt single PINHOLE camera (fl from 32° HFoV)
images.txt quaternion + translation per scene
points3D.txt empty (Phase 1 will add the 80×64 sparse seed)
```
### Usage
```bash
python3 scripts/elphel_to_colmap.py \
--tiff /path/to/<ts>-TERRAIN-DISP-MERGED.tiff \
--xml /path/to/<ts>-INTERFRAME.corr-xml \
--out /tmp/colmap_out
```
Optional: `--hfov 32.0` (degrees, default 32).
### Dependencies
Python 3.8+, Pillow, NumPy.
### Coordinate-convention notes
See `src/elphel_to_mvs/pose_convert.py` for the full derivation.
Phase 3 Blender smoke test will validate the rotation convention.
#!/usr/bin/env python3
# -----------------------------------------------------------------------------
# elphel_to_colmap.py — Convert Elphel LWIR-16 segment data to COLMAP format
#
# Copyright (C) 2026 Elphel, Inc.
# -----------------------------------------------------------------------------
#
# elphel_to_colmap.py is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
"""
Phase 0 pose+camera-model bridge: Elphel → COLMAP / OpenMVS.
Usage
-----
python3 scripts/elphel_to_colmap.py \\
--tiff PATH/TO/<ts>-TERRAIN-DISP-MERGED.tiff \\
--xml PATH/TO/<ts>-INTERFRAME.corr-xml \\
--out /tmp/colmap_out
Output layout
-------------
<out>/
images/ 16-bit PNG per scene (named <timestamp>.png)
sparse/0/
cameras.txt single PINHOLE camera
images.txt per-image poses (quaternion + translation)
points3D.txt empty (Phase 0; sparse seed added in Phase 1)
"""
import sys
import argparse
from pathlib import Path
# Allow running directly from the repo root without installing
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "src"))
from elphel_to_mvs.read_interframe_xml import parse as parse_xml
from elphel_to_mvs.read_tiff_stack import read_scene_slices
from elphel_to_mvs.export_colmap import write_colmap_model
def main():
ap = argparse.ArgumentParser(description="Elphel → COLMAP Phase 0 bridge")
ap.add_argument("--tiff", required=True,
help="*-TERRAIN-DISP-MERGED.tiff (or any Elphel scene TIFF stack)")
ap.add_argument("--xml", required=True,
help="*-INTERFRAME.corr-xml")
ap.add_argument("--out", required=True,
help="Output directory (will be created)")
ap.add_argument("--hfov", type=float, default=32.0,
help="Camera horizontal field-of-view in degrees (default 32)")
args = ap.parse_args()
print(f"Reading poses from {args.xml}")
poses, ref_ts = parse_xml(args.xml)
print(f" {len(poses)} scene poses loaded (reference: {ref_ts})")
print(f"Reading image slices from {args.tiff}")
scenes, gmin, gmax = read_scene_slices(args.tiff)
print(f" {len(scenes)} timestamp slices found"
f" (global float range [{gmin:.2f}, {gmax:.2f}])")
write_colmap_model(
output_dir=args.out,
scenes=scenes,
poses=poses,
global_min=gmin,
global_max=gmax,
hfov_deg=args.hfov,
)
if __name__ == "__main__":
main()
# elphel_to_mvs — Elphel LWIR-16 data → OpenMVS / COLMAP bridge
#
# Copyright (C) 2026 Elphel, Inc.
# GPLv3 — see LICENSE
# -----------------------------------------------------------------------------
# export_colmap.py — Write COLMAP sparse model files from Elphel data
#
# Copyright (C) 2026 Elphel, Inc.
# -----------------------------------------------------------------------------
#
# export_colmap.py is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
"""
Write COLMAP text-format sparse model files:
cameras.txt single PINHOLE camera shared by all images
images.txt one entry per scene with extrinsics (qw qx qy qz tx ty tz)
points3D.txt empty (no sparse seed for Phase 0; add SZXY later)
Image files are 16-bit grayscale PNG files saved under <output_dir>/images/.
Float32 values are linearly scaled from [global_min, global_max] to [0, 65535].
"""
import struct
from pathlib import Path
import numpy as np
def _save_image_uint16(arr, path, global_min, global_max):
"""Scale float32 array to uint16 and save as PNG. NaN pixels → 0."""
from PIL import Image as PILImage
span = global_max - global_min
if span == 0 or np.isnan(span):
span = 1.0
scaled = np.clip(np.nan_to_num(arr, nan=global_min),
global_min, global_max)
scaled = ((scaled - global_min) / span * 65535.0).astype(np.uint16)
PILImage.fromarray(scaled, mode="I;16").save(path)
def write_colmap_model(
output_dir,
scenes, # list of (timestamp, arr_float32)
poses, # dict timestamp -> (x, y, z, az, tilt, roll)
global_min,
global_max,
width=640,
height=512,
hfov_deg=32.0,
):
"""
Write a COLMAP sparse model directory.
Parameters
----------
output_dir : str or Path
scenes : list of (timestamp_str, numpy float32 array)
poses : dict timestamp_str -> (x, y, z, azimuth, tilt, roll)
global_min, global_max : float for image scaling
width, height : int image dimensions
hfov_deg : float horizontal field of view in degrees
"""
from .pose_convert import pose_to_colmap
output_dir = Path(output_dir)
images_dir = output_dir / "images"
sparse_dir = output_dir / "sparse" / "0"
images_dir.mkdir(parents=True, exist_ok=True)
sparse_dir.mkdir(parents=True, exist_ok=True)
# Focal length from HFoV
fl = width / (2.0 * np.tan(np.radians(hfov_deg / 2.0)))
cx = (width - 1) / 2.0
cy = (height - 1) / 2.0
# cameras.txt — single shared camera
with open(sparse_dir / "cameras.txt", "w") as f:
f.write("# Camera list with one line of data per camera:\n")
f.write("# CAMERA_ID, MODEL, WIDTH, HEIGHT, PARAMS[]\n")
f.write(f"1 PINHOLE {width} {height} {fl:.6f} {fl:.6f} {cx:.6f} {cy:.6f}\n")
# images.txt + per-scene PNG files
matched = [(ts, arr) for ts, arr in scenes if ts in poses]
print(f" {len(matched)} of {len(scenes)} slices have pose entries")
with open(sparse_dir / "images.txt", "w") as f:
f.write("# Image list with two lines of data per image:\n")
f.write("# IMAGE_ID, QW, QX, QY, QZ, TX, TY, TZ, CAMERA_ID, NAME\n")
f.write("# POINTS2D[] as (X, Y, POINT3D_ID)\n")
for img_id, (ts, arr) in enumerate(matched, start=1):
x, y, z, az, tilt, roll = poses[ts]
q, t = pose_to_colmap(x, y, z, az, tilt, roll)
img_name = f"{ts}.png"
f.write(
f"{img_id} "
f"{q[0]:.9f} {q[1]:.9f} {q[2]:.9f} {q[3]:.9f} "
f"{t[0]:.9f} {t[1]:.9f} {t[2]:.9f} "
f"1 {img_name}\n"
)
f.write("\n") # empty 2D-points line required by COLMAP
_save_image_uint16(arr, images_dir / img_name, global_min, global_max)
# points3D.txt — empty
with open(sparse_dir / "points3D.txt", "w") as f:
f.write("# 3D point list — empty for Phase 0 (no sparse seed)\n")
print(f" Wrote {len(matched)} images to {output_dir}")
print(f" cameras.txt: PINHOLE fl={fl:.1f}px cx={cx:.1f} cy={cy:.1f}")
# -----------------------------------------------------------------------------
# pose_convert.py — Elphel ATR pose → COLMAP / OpenMVS extrinsics
#
# Copyright (C) 2026 Elphel, Inc.
# -----------------------------------------------------------------------------
#
# pose_convert.py is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
"""
Convert Elphel (x, y, z, azimuth, tilt, roll) scene poses to the COLMAP /
OpenMVS camera extrinsic convention.
Elphel coordinate convention
-----------------------------
Camera frame:
X = image-plane right
Y = image-plane up
Z < 0 along the optical axis (camera looks in −Z direction, OpenGL-style)
Stored pose (x, y, z, az, tilt, roll):
(x, y, z) – camera centre in the reference-scene world frame (metres)
(az, tilt, roll) – Euler angles (radians) from the reference orientation
Rotation convention — mirrors GeometryCorrection.getCommonRotMatrix():
R_elphel = R_roll(roll) @ R_tilt(tilt) @ R_az(azimuth)
where each factor is the TRANSPOSED standard rotation matrix, i.e.:
R_az(a) = [[cos a, 0, −sin a], [0, 1, 0], [sin a, 0, cos a]] (R_Y^T)
R_tilt(t) = [[1, 0, 0], [0, cos t, sin t], [0, −sin t, cos t]] (R_X^T)
R_roll(r) = [[cos r, sin r, 0], [−sin r, cos r, 0], [0, 0, 1]] (R_Z^T)
Combined: R_elphel transforms world → Elphel-camera coords.
COLMAP / OpenMVS convention
----------------------------
Camera frame:
X = right, Y = down, Z = forward (looks along +Z)
The flip between the two camera frames is:
F = diag(1, −1, −1) (flip Y and Z)
so the world-to-COLMAP-camera rotation is:
R_w2c = F @ R_elphel
and the COLMAP translation vector is:
t = R_w2c @ (−C) where C = [x, y, z] is the camera centre.
NOTE: Phase 3 smoke test will validate sign / axis conventions.
If the Blender re-render is mirrored, negate a column of F.
"""
import numpy as np
def _rotation_matrix(azimuth, tilt, roll):
"""
Build the Elphel world-to-camera rotation matrix.
Mirrors GeometryCorrection.getCommonRotMatrix() exactly.
"""
ca, sa = np.cos(azimuth), np.sin(azimuth)
ct, st = np.cos(tilt), np.sin(tilt)
cr, sr = np.cos(roll), np.sin(roll)
# Each factor is already the transposed (inverse) standard rotation
R_az = np.array([[ca, 0., -sa],
[0., 1., 0.],
[sa, 0., ca]])
R_tilt = np.array([[1., 0., 0.],
[0., ct, st],
[0., -st, ct]])
R_roll = np.array([[ cr, sr, 0.],
[-sr, cr, 0.],
[ 0., 0., 1.]])
return R_roll @ R_tilt @ R_az # world → Elphel-camera
# Flip from Elphel-camera frame (Y-up, -Z-fwd) to COLMAP-camera frame (Y-dn, +Z-fwd)
_FLIP = np.diag([1., -1., -1.])
def pose_to_colmap(x, y, z, azimuth, tilt, roll):
"""
Convert an Elphel scene pose to COLMAP extrinsics.
Parameters
----------
x, y, z : float Camera centre in world frame (metres)
azimuth, tilt, roll : float Orientation angles (radians)
Returns
-------
q : ndarray (4,) Quaternion (qw, qx, qy, qz) — world-to-camera
t : ndarray (3,) Translation vector t = R_w2c @ (−C)
"""
R_elphel = _rotation_matrix(azimuth, tilt, roll)
R_w2c = _FLIP @ R_elphel
C = np.array([x, y, z])
t = R_w2c @ (-C)
q = rotation_matrix_to_quaternion(R_w2c)
return q, t
def rotation_matrix_to_quaternion(R):
"""Convert a 3×3 rotation matrix to a (qw, qx, qy, qz) unit quaternion."""
trace = R[0, 0] + R[1, 1] + R[2, 2]
if trace > 0:
s = 0.5 / np.sqrt(trace + 1.0)
qw = 0.25 / s
qx = (R[2, 1] - R[1, 2]) * s
qy = (R[0, 2] - R[2, 0]) * s
qz = (R[1, 0] - R[0, 1]) * s
elif R[0, 0] > R[1, 1] and R[0, 0] > R[2, 2]:
s = 2.0 * np.sqrt(1.0 + R[0, 0] - R[1, 1] - R[2, 2])
qw = (R[2, 1] - R[1, 2]) / s
qx = 0.25 * s
qy = (R[0, 1] + R[1, 0]) / s
qz = (R[0, 2] + R[2, 0]) / s
elif R[1, 1] > R[2, 2]:
s = 2.0 * np.sqrt(1.0 + R[1, 1] - R[0, 0] - R[2, 2])
qw = (R[0, 2] - R[2, 0]) / s
qx = (R[0, 1] + R[1, 0]) / s
qy = 0.25 * s
qz = (R[1, 2] + R[2, 1]) / s
else:
s = 2.0 * np.sqrt(1.0 + R[2, 2] - R[0, 0] - R[1, 1])
qw = (R[1, 0] - R[0, 1]) / s
qx = (R[0, 2] + R[2, 0]) / s
qy = (R[1, 2] + R[2, 1]) / s
qz = 0.25 * s
return np.array([qw, qx, qy, qz])
# -----------------------------------------------------------------------------
# read_interframe_xml.py — Parse *-INTERFRAME.corr-xml for per-scene poses
#
# Copyright (C) 2026 Elphel, Inc.
# -----------------------------------------------------------------------------
#
# read_interframe_xml.py is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
"""
Parse Elphel *-INTERFRAME.corr-xml files.
Relevant keys (all under the EYESIS_DCT_AUX namespace):
scenes_<ts> 6-value CSV: X Y Z azimuth tilt roll
Position (m) and orientation (rad) of scene <ts>
relative to the reference scene.
scenes_<ts>_dt First time-derivative — velocity. Not used here.
scenes_<ts>_d2t Second time-derivative — acceleration. Not used here.
timestamp_reference Timestamp string of the reference scene (pose = 0).
Coordinate convention (Elphel):
X = image-plane right
Y = image-plane up
Z = 0 at the camera centre, Z < 0 along the view direction (OpenGL-style)
All values are relative to the reference scene, which has pose (0,0,0,0,0,0).
"""
import xml.etree.ElementTree as ET
from pathlib import Path
_PREFIX = "EYESIS_DCT_AUX."
_SCENE_KEY = _PREFIX + "scenes_"
_REF_KEY = _PREFIX + "timestamp_reference"
def parse(xml_path):
"""
Parse *-INTERFRAME.corr-xml.
Returns
-------
poses : dict
{ timestamp_str -> (x, y, z, azimuth, tilt, roll) }
Only entries without _dt / _d2t suffix.
x/y/z in metres, angles in radians.
reference_ts : str
Timestamp string of the reference scene (pose == 0).
"""
tree = ET.parse(Path(xml_path))
root = tree.getroot()
entries = {e.get("key"): e.text for e in root.findall("entry")}
reference_ts = entries.get(_REF_KEY, "").strip()
poses = {}
for key, value in entries.items():
if not key.startswith(_SCENE_KEY):
continue
ts = key[len(_SCENE_KEY):]
# skip velocity / acceleration derivatives
if ts.endswith("_dt") or ts.endswith("_d2t"):
continue
try:
vals = [float(v) for v in value.split(",")]
except (TypeError, ValueError):
continue
if len(vals) != 6:
continue
poses[ts] = tuple(vals) # (x, y, z, azimuth, tilt, roll)
return poses, reference_ts
# -----------------------------------------------------------------------------
# read_tiff_stack.py — Read 32-bit float slices from Elphel multi-frame TIFFs
#
# Copyright (C) 2026 Elphel, Inc.
# -----------------------------------------------------------------------------
#
# read_tiff_stack.py is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# -----------------------------------------------------------------------------
"""
Read per-scene 32-bit float image slices from Elphel multi-frame TIFF stacks.
File format
-----------
Multi-frame float32 TIFF written by ImageJ. Per-slice labels are stored in
TIFF tag 50839 as a UTF-16-BE byte stream, with lengths in tag 50838.
The first entry in the label array is a fixed ImageJ header ("IJIJlabl…")
and is skipped.
Slice labels fall into two categories:
"average" – composite average slice; skip
"average-<n>:<m>" – another average slice; skip
"<ts>-<n>" – per-scene slice where <ts> is the scene timestamp
(digits and "_" only, e.g. "1763232146_700470")
and <n> is a small integer suffix (pass index).
Only timestamp-bearing slices are returned. The timestamp key is the part
before the "-<n>" suffix, e.g. "1763232146_700470".
"""
from pathlib import Path
import numpy as np
from PIL import Image
_TAG_LABEL_LENGTHS = 50838
_TAG_LABEL_STRINGS = 50839
def _decode_labels(tif):
"""Decode per-slice UTF-16-BE labels from ImageJ custom TIFF tags."""
tag = tif.tag_v2
if _TAG_LABEL_LENGTHS not in tag or _TAG_LABEL_STRINGS not in tag:
return []
lengths = tag[_TAG_LABEL_LENGTHS]
raw = tag[_TAG_LABEL_STRINGS]
pos = 0
decoded = []
for length in lengths:
chunk = raw[pos: pos + length]
pos += length
decoded.append(chunk.decode("utf-16-be", errors="replace").rstrip("\x00"))
# First entry is the fixed ImageJ "IJIJlabl…" header — drop it
return decoded[1:]
def _is_timestamp_label(label):
"""True if the label is a per-scene timestamp slice (not an average)."""
return "_" in label and not label.startswith("average")
def _timestamp_from_label(label):
"""
Strip the trailing "-<n>" pass-index suffix.
"1763232146_700470-0" → "1763232146_700470"
"""
return label.rsplit("-", 1)[0]
def read_scene_slices(tiff_path):
"""
Read all per-scene float32 image slices from an Elphel TERRAIN TIFF stack.
Parameters
----------
tiff_path : str or Path
Returns
-------
slices : list of (timestamp_str, numpy.ndarray float32, shape (H, W))
Ordered by slice index in the file. Only timestamp-bearing slices
are included; "average" slices are skipped.
global_min : float
global_max : float
Min/max advertised in the ImageJ description header, useful for
consistent normalisation across the full sequence.
"""
tiff_path = Path(tiff_path)
tif = Image.open(tiff_path)
labels = _decode_labels(tif)
# Parse global min/max from the ImageJ description tag (tag 270)
desc = tif.tag_v2.get(270, b"")
if isinstance(desc, bytes):
desc = desc.decode("utf-8", errors="replace")
global_min, global_max = _parse_imagej_minmax(desc)
slices = []
for idx, label in enumerate(labels):
if not _is_timestamp_label(label):
continue
ts = _timestamp_from_label(label)
tif.seek(idx)
arr = np.array(tif, dtype=np.float32)
slices.append((ts, arr))
tif.close()
return slices, global_min, global_max
def _parse_imagej_minmax(desc):
"""Extract min= and max= from ImageJ TIFF description string."""
gmin, gmax = float("nan"), float("nan")
for line in desc.splitlines():
line = line.strip()
if line.startswith("min="):
try:
gmin = float(line.split("=", 1)[1])
except ValueError:
pass
elif line.startswith("max="):
try:
gmax = float(line.split("=", 1)[1])
except ValueError:
pass
return gmin, gmax
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment