#!/usr/bin/env python3
import email
import imaplib
import os
import re
from email.header import decode_header
from pathlib import Path

IMAP_HOST = os.environ.get("ELPHEL_IMAP_HOST", "mail.elphel.com")
IMAP_PORT = int(os.environ.get("ELPHEL_IMAP_PORT", "993"))
IMAP_USER = os.environ.get("ELPHEL_IMAP_USER", "codex@elphel.com")
IMAP_PASS = os.environ.get("ELPHEL_IMAP_PASS")
MAILBOX = os.environ.get("ELPHEL_IMAP_BOX", "INBOX")
OUT_DIR = Path(os.environ.get("EMAIL_OUT_DIR", "attic/email_inbox"))
SEARCH = os.environ.get("EMAIL_SEARCH", "ALL")


def _decode(value):
    if value is None:
        return ""
    parts = decode_header(value)
    out = ""
    for text, enc in parts:
        if isinstance(text, bytes):
            out += text.decode(enc or "utf-8", errors="replace")
        else:
            out += text
    return out


def _safe_subject(subject):
    return "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in subject)[:80] or "message"


def _write_index(index_path, values):
    line = ",".join(v.replace(",", " ") for v in values)
    index_path.parent.mkdir(parents=True, exist_ok=True)
    with index_path.open("a", encoding="utf-8") as f:
        f.write(line + "\n")


def _decoded_text_parts(msg):
    parts = []
    if msg.is_multipart():
        for part in msg.walk():
            if part.get_content_type() != "text/plain":
                continue
            payload = part.get_payload(decode=True)
            if payload is None:
                continue
            charset = part.get_content_charset() or "utf-8"
            parts.append(payload.decode(charset, errors="replace"))
    else:
        payload = msg.get_payload(decode=True)
        if payload is not None:
            charset = msg.get_content_charset() or "utf-8"
            parts.append(payload.decode(charset, errors="replace"))
    return "\n".join(parts)


def _extract_original_headers(text):
    if not text:
        return "", "", ""
    # Find forwarded blocks and keep the deepest match.
    pattern = re.compile(
        r"Forwarded message.*?From:\s*(?P<from>.+?)\r?\n"
        r"(?:To:\s*.+?\r?\n)?"
        r"Date:\s*(?P<date>.+?)\r?\n"
        r"Subject:\s*(?P<subject>.+?)\r?\n",
        re.IGNORECASE | re.DOTALL,
    )
    matches = list(pattern.finditer(text))
    if not matches:
        return "", "", ""
    last = matches[-1]
    original_from = " ".join(last.group("from").split())
    original_date = " ".join(last.group("date").split())
    original_subject = " ".join(last.group("subject").split())
    return original_from, original_subject, original_date


def fetch():
    if not IMAP_PASS:
        raise SystemExit("Set ELPHEL_IMAP_PASS")
    with imaplib.IMAP4_SSL(IMAP_HOST, IMAP_PORT) as imap:
        imap.login(IMAP_USER, IMAP_PASS)
        imap.select(MAILBOX)
        status, data = imap.search(None, SEARCH)
        if status != "OK":
            raise SystemExit("IMAP search failed")
        ids = data[0].split()
        for idx, msg_id in enumerate(ids, start=1):
            status, msg_data = imap.fetch(msg_id, "(RFC822)")
            if status != "OK":
                continue
            raw = msg_data[0][1]
            msg = email.message_from_bytes(raw)
            subject = _decode(msg.get("Subject"))
            date = _decode(msg.get("Date"))
            from_ = _decode(msg.get("From"))
            to_ = _decode(msg.get("To"))
            cc_ = _decode(msg.get("Cc"))
            text = _decoded_text_parts(msg)
            original_from, original_subject, original_date = _extract_original_headers(text)
            date_dir = OUT_DIR / (date[:10].replace(" ", "_") if date else "unknown-date")
            date_dir.mkdir(parents=True, exist_ok=True)
            seq = f"{idx:04d}"
            fname = f"{seq}_{_safe_subject(subject)}.eml"
            out_path = date_dir / fname
            out_path.write_bytes(raw)
            meta_path = date_dir / f"{seq}_meta.txt"
            meta_path.write_text(
                f"From: {from_}\n"
                f"To: {to_}\n"
                f"Cc: {cc_}\n"
                f"Date: {date}\n"
                f"Subject: {subject}\n"
                f"Original-From: {original_from}\n"
                f"Original-Date: {original_date}\n"
                f"Original-Subject: {original_subject}\n",
                encoding="utf-8",
            )
            _write_index(
                OUT_DIR / "index.csv",
                [date, from_, to_, cc_, subject, original_from, original_date, original_subject, str(out_path)],
            )


if __name__ == "__main__":
    fetch()
