#!/usr/bin/env python3
"""Populate KRB Webflow News Posts main-image field from WordPress featured/OG image dry-run map.

Secret handling: retrieves Webflow token from 1Password at runtime and never prints it.
"""
import hashlib
import json
import mimetypes
import os
import re
import subprocess
import time
from pathlib import Path
from urllib.parse import urlparse, unquote

import requests

SITE_ID = "6a1e37436b332da28ecc3001"
NEWS_COLLECTION_ID = "6a1e37436b332da28ecc3016"
OP_ITEM_ID = "bqfhhqbsg5dmhoxof2l5c64xsa"
OP_VAULT = "Vault for Iggy (IGNITE OpenClaw Bot)"
DRY_RUN_PATH = Path("/Users/iggy/.hermes/profiles/ignite_team/outbound/krb-news-main-image-dry-run.json")
REPORT_PATH = Path("/Users/iggy/.hermes/profiles/ignite_team/outbound/krb-news-main-image-update-report.json")
WF_BASE = "https://api.webflow.com/v2"
UA = "Hermes KRB News Image Fix/1.0"


def read_env_var(path: Path, key: str):
    if not path.exists():
        return None
    for line in path.read_text(errors="replace").splitlines():
        s = line.strip()
        if not s or s.startswith("#"):
            continue
        if s.startswith("export "):
            s = s[7:].strip()
        if "=" not in s:
            continue
        k, v = s.split("=", 1)
        if k.strip() == key:
            return v.strip().strip('"').strip("'")
    return None


def get_webflow_token():
    token = os.environ.get("OP_SERVICE_ACCOUNT_TOKEN") or read_env_var(Path("/Users/iggy/.hermes/profiles/ignite_team/.env"), "OP_SERVICE_ACCOUNT_TOKEN")
    if not token:
        raise SystemExit("OP_SERVICE_ACCOUNT_TOKEN not available")
    env = os.environ.copy()
    env["OP_SERVICE_ACCOUNT_TOKEN"] = token
    cp = subprocess.run(
        ["op", "item", "get", OP_ITEM_ID, "--vault", OP_VAULT, "--format", "json", "--reveal"],
        env=env,
        text=True,
        capture_output=True,
        check=True,
    )
    item = json.loads(cp.stdout)
    # Prefer the known concealed field label, fall back to first concealed field.
    concealed = []
    for field in item.get("fields", []):
        if field.get("type") == "CONCEALED" and field.get("value"):
            concealed.append(field)
            if field.get("label") == "credential":
                return field["value"]
    if concealed:
        return concealed[0]["value"]
    raise SystemExit("KRB Webflow credential field not found")


class Webflow:
    def __init__(self, token):
        self.s = requests.Session()
        self.s.headers.update({"Authorization": f"Bearer {token}", "accept": "application/json", "User-Agent": UA})

    def request(self, method, path, **kwargs):
        url = WF_BASE + path
        if "json" in kwargs:
            headers = kwargs.pop("headers", {})
            headers.setdefault("content-type", "application/json")
            kwargs["headers"] = headers
        r = self.s.request(method, url, timeout=90, **kwargs)
        if r.status_code >= 400:
            raise RuntimeError(f"Webflow {method} {path} failed {r.status_code}: {r.text[:1000]}")
        if not r.text.strip():
            return {}
        return r.json()

    def get_all_items(self, collection_id):
        out = []
        offset = 0
        while True:
            data = self.request("GET", f"/collections/{collection_id}/items?limit=100&offset={offset}")
            items = data.get("items", [])
            out.extend(items)
            pag = data.get("pagination") or {}
            total = pag.get("total", len(out))
            if len(out) >= total or not items:
                return out
            offset += len(items)

    def create_asset_metadata(self, file_name, file_hash):
        return self.request("POST", f"/sites/{SITE_ID}/assets", json={"fileName": file_name[:99], "fileHash": file_hash})

    def update_item_image(self, item_id, image_obj):
        payload = {"items": [{"id": item_id, "fieldData": {"main-image": image_obj}}]}
        return self.request("PATCH", f"/collections/{NEWS_COLLECTION_ID}/items", json=payload)


def filename_from_url(url):
    path = unquote(urlparse(url).path)
    name = path.rsplit("/", 1)[-1] or "image.jpg"
    name = re.sub(r"[^A-Za-z0-9._-]+", "-", name)
    if "." not in name:
        name += ".jpg"
    return name[:99]


def download_image(url):
    r = requests.get(url, headers={"User-Agent": UA}, timeout=90)
    r.raise_for_status()
    return r.content, r.headers.get("content-type") or mimetypes.guess_type(url)[0] or "application/octet-stream"


def upload_asset(wf: Webflow, url, cache):
    if url in cache:
        return cache[url]
    content, content_type = download_image(url)
    md5 = hashlib.md5(content).hexdigest()
    file_name = filename_from_url(url)
    meta = wf.create_asset_metadata(file_name, md5)
    asset_id = meta.get("id") or meta.get("asset", {}).get("id")
    asset_url = meta.get("hostedUrl") or meta.get("url") or meta.get("assetUrl") or meta.get("asset", {}).get("hostedUrl") or meta.get("asset", {}).get("url")
    upload_url = meta.get("uploadUrl")
    upload_details = meta.get("uploadDetails") or {}
    if upload_url and upload_details:
        s3 = requests.post(upload_url, data=upload_details, files={"file": (file_name, content, content_type)}, timeout=180)
        if s3.status_code not in (200, 201, 204):
            raise RuntimeError(f"S3 upload failed {s3.status_code}: {s3.text[:500]}")
    if not asset_id:
        raise RuntimeError(f"No asset id in response keys {list(meta.keys())}")
    result = {"fileId": asset_id}
    if asset_url:
        result["url"] = asset_url
    cache[url] = result
    return result


def main():
    dry = json.loads(DRY_RUN_PATH.read_text())
    rows = [r for r in dry.get("results", []) if not r.get("current_has_main_image") and r.get("source_image_url")]
    token = get_webflow_token()
    wf = Webflow(token)
    before = wf.get_all_items(NEWS_COLLECTION_ID)
    before_by_id = {it.get("id"): it for it in before}
    rows = [r for r in rows if r.get("webflow_id") in before_by_id and not before_by_id[r["webflow_id"]].get("fieldData", {}).get("main-image")]

    report = {
        "started_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
        "target_count": len(rows),
        "updated": [],
        "errors": [],
        "asset_cache_count": 0,
    }
    asset_cache = {}
    print(f"Preparing to update {len(rows)} KRB News main images", flush=True)

    for idx, r in enumerate(rows, 1):
        try:
            image_obj = upload_asset(wf, r["source_image_url"], asset_cache)
            wf.update_item_image(r["webflow_id"], image_obj)
            report["updated"].append({
                "slug": r.get("slug"),
                "name": r.get("name"),
                "webflow_id": r.get("webflow_id"),
                "source_image_url": r.get("source_image_url"),
                "fileId": image_obj.get("fileId"),
                "has_url": bool(image_obj.get("url")),
            })
            print(f"[{idx}/{len(rows)}] updated {r.get('slug')} image=yes", flush=True)
        except Exception as e:
            report["errors"].append({"slug": r.get("slug"), "webflow_id": r.get("webflow_id"), "source_image_url": r.get("source_image_url"), "error": str(e)})
            print(f"[{idx}/{len(rows)}] FAILED {r.get('slug')}: {e}", flush=True)
        report["asset_cache_count"] = len(asset_cache)
        REPORT_PATH.write_text(json.dumps(report, indent=2, ensure_ascii=False))
        time.sleep(0.15)

    final = wf.get_all_items(NEWS_COLLECTION_ID)
    report["finished_at"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
    report["final_total"] = len(final)
    report["final_with_main_image"] = sum(bool((it.get("fieldData") or {}).get("main-image")) for it in final)
    report["final_missing_main_image"] = [
        {"id": it.get("id"), "slug": (it.get("fieldData") or {}).get("slug"), "name": (it.get("fieldData") or {}).get("name")}
        for it in final if not (it.get("fieldData") or {}).get("main-image")
    ]
    REPORT_PATH.write_text(json.dumps(report, indent=2, ensure_ascii=False))
    print("DONE", json.dumps({
        "target_count": report["target_count"],
        "updated": len(report["updated"]),
        "errors": len(report["errors"]),
        "final_total": report["final_total"],
        "final_with_main_image": report["final_with_main_image"],
        "final_missing_main_image": len(report["final_missing_main_image"]),
    }, indent=2), flush=True)


if __name__ == "__main__":
    main()
