#!/usr/bin/env python3
"""Generate KRB Webflow Designer import JS from the live-scrape JSON contract.

This deliberately reuses the proven About Us v7 importer and replaces its SOURCE,
GALLERY_MANIFEST and EXPECTED constants with data produced by
krb_live_block_scraper.py. It also patches reporting/context strings so source
child numbers and page-slot child numbers show up in import reports.
"""
from __future__ import annotations

import argparse
import json
import re
import zipfile
from pathlib import Path
from typing import Any, Dict

OUT_DIR = Path("/Users/iggy/.hermes/profiles/ignite_team/outbound")
BASE_JS = OUT_DIR / "krb-about-us-full-page-group-import-v7.js"


def load_json(path: Path) -> Dict[str, Any]:
    return json.loads(path.read_text(encoding="utf-8"))


def normalise_for_v7(data: Dict[str, Any]) -> Dict[str, Any]:
    """Convert scraper contract to the v7 importer's expected SOURCE shape."""
    out: Dict[str, Any] = {}
    for page_name, page in data.items():
        p = dict(page)
        # v7 expects accordion entries like {title, html, groupHeading}. The scraper
        # stores grouped accordion blocks with items[]. Flatten those for the current
        # Employment fallback behaviour while retaining raw source in unmatched/report.
        flattened = []
        for group in p.get("accordions") or []:
            group_heading = group.get("heading") or "Accordion"
            for item in group.get("items") or []:
                flattened.append({
                    "groupHeading": group_heading,
                    "title": item.get("heading") or group_heading,
                    "html": item.get("html") or item.get("text") or "",
                    "text": item.get("text") or "",
                    "sourceChildNumber": item.get("sourceChildNumber") or group.get("sourceChildNumber"),
                })
        p["accordions"] = flattened
        # Make sure all page links use Designer API shape-friendly page IDs where known.
        for link in p.get("gatewayLinks") or []:
            if not link.get("pageId"):
                link.setdefault("attention", "unresolved internal page id")
        out[page_name] = p
    return out


def expected_from_source(source: Dict[str, Any]) -> Dict[str, Dict[str, int]]:
    expected: Dict[str, Dict[str, int]] = {}
    for name, p in source.items():
        e: Dict[str, int] = {}
        if p.get("textContent"):
            count = len(p.get("textContent") or [])
            if name == "Employment" and p.get("accordions"):
                count += 1
            e["Section / Text Content"] = count
        if p.get("twoColumn"):
            e["Section / Two Column Text & Image"] = len(p.get("twoColumn") or [])
        if p.get("gatewayLinks"):
            e["Section / Gateway CTA"] = 1
        if p.get("downloads"):
            e["Section / Downloads"] = len(p.get("downloads") or [])
        # Most migrated static pages use Next Pages. Keep expected if source provides any.
        if p.get("nextPages"):
            e["Section / Next Pages"] = 1
        expected[name] = e
    return expected


def gallery_manifest_from_source(source: Dict[str, Any]) -> Dict[str, Any]:
    manifest: Dict[str, Any] = {}
    for name, p in source.items():
        galleries = []
        # Preserve true galleries if scraper found them. Two-column images remain in SOURCE.twoColumn[].images
        # and are handled by sectionImageSpec fallback in the v7 importer.
        for g in p.get("galleries") or []:
            imgs = g.get("images") or []
            if imgs:
                galleries.append({"heading": g.get("heading") or "Gallery", "images": imgs, "sourceChildNumber": g.get("sourceChildNumber")})
        if galleries:
            manifest[name] = {"pageId": p.get("meta", {}).get("id"), "path": p.get("meta", {}).get("path"), "galleries": galleries}
    return manifest


def replace_const(js: str, name: str, value: Any) -> str:
    encoded = json.dumps(value, ensure_ascii=False, separators=(",", ":"))
    # Use a callable replacement so JSON backslashes (e.g. \n in rich HTML) are not
    # interpreted by re.sub's replacement-string escape processing.
    pattern = rf"(\n\s*const {re.escape(name)} = ).*?(;\n\s*const )"
    new, n = re.subn(pattern, lambda m: f"{m.group(1)}{encoded}{m.group(2)}", js, count=1, flags=re.S)
    if n:
        return new
    # Last const before config might not be followed by const in future variants.
    pattern2 = rf"(\n\s*const {re.escape(name)} = ).*?(;\n)"
    new, n = re.subn(pattern2, lambda m: f"{m.group(1)}{encoded}{m.group(2)}", js, count=1, flags=re.S)
    if not n:
        raise RuntimeError(f"Could not replace const {name}")
    return new


def patch_importer(js: str) -> str:
    js = js.replace(
        "// Generated KRB About Us full page-group import v7",
        "// Generated KRB live-scrape JSON import v1 (based on proven About Us v7 importer)",
        1,
    )
    # Keep safer batch defaults: a teammate can intentionally set dryRun false + autoRunAllPages true in Code Lab.
    js = re.sub(r"dryRun:\s*false", "dryRun: true", js, count=1)
    js = re.sub(r"resultPrefix:\s*'KRB_ABOUT_FULL_PAGE_GROUP_IMPORT_RESULT'", "resultPrefix: 'KRB_LIVE_SCRAPE_IMPORT_RESULT'", js, count=1)
    js = re.sub(r"allPagesResultPrefix:\s*'KRB_ABOUT_FULL_PAGE_GROUP_AUTO_RESULT'", "allPagesResultPrefix: 'KRB_LIVE_SCRAPE_AUTO_RESULT'", js, count=1)

    # Add target/source location helper after counts().
    js = js.replace(
        "function counts(rows) { const c = {}; for (const r of rows) c[r.componentName] = (c[r.componentName] || 0) + 1; return c; }",
        "function counts(rows) { const c = {}; for (const r of rows) c[r.componentName] = (c[r.componentName] || 0) + 1; return c; }\n"
        "  function pageSlotChildNumber(el, rows) {\n"
        "    const top = rows.filter(r => r.kind === 'element' && r.componentName && !r.viaSlot && r.depth <= 2);\n"
        "    const idx = top.findIndex(r => r.el === el);\n"
        "    return idx >= 0 ? idx + 1 : null;\n"
        "  }\n"
        "  function locCtx(base, rows, el, block) {\n"
        "    const pageSlotChild = pageSlotChildNumber(el, rows);\n"
        "    const bits = [];\n"
        "    if (pageSlotChild) bits.push(`page slot child ${pageSlotChild}`);\n"
        "    if (block?.sourceChildNumber) bits.push(`source child ${block.sourceChildNumber}`);\n"
        "    return bits.length ? `${base} (${bits.join('; ')})` : base;\n"
        "  }",
        1,
    )

    # Add source attention/unmatched report fields immediately after source resolution.
    js = js.replace(
        "const source = SOURCE[pageName];\n    let rows = await componentRows(report);",
        "const source = SOURCE[pageName];\n"
        "    report.sourceAttention = (source.attention || []).map(a => ({ ...a, note: a.issue || a.note || 'source attention' }));\n"
        "    report.unmatchedSourceSections = (source.unmatchedSections || []).map(s => ({\n"
        "      issue: 'unmatched source section',\n"
        "      location: `source child ${s.sourceChildNumber || '?'}`,\n"
        "      sourceChildNumber: s.sourceChildNumber || null,\n"
        "      classes: s.classes || [],\n"
        "      text: (s.text || '').slice(0, 240)\n"
        "    }));\n"
        "    for (const a of report.sourceAttention) report.manual.push(`${a.location || 'source'}: ${a.issue || a.note || 'source attention'}`);\n"
        "    for (const s of report.unmatchedSourceSections) report.manual.push(`${s.location}: ${s.issue}${s.classes?.length ? ' [' + s.classes.join(' ') + ']' : ''}`);\n"
        "    let rows = await componentRows(report);",
        1,
    )

    # Include page-slot/source-child details in component population contexts.
    js = js.replace(
        "for (let i=0; i<Math.min(textEls.length, textBlocks.length); i++) await populateText(textEls[i], textBlocks[i], report, `${pageName} Text Content ${i+1}`);",
        "for (let i=0; i<Math.min(textEls.length, textBlocks.length); i++) await populateText(textEls[i], textBlocks[i], report, locCtx(`${pageName} Text Content ${i+1}`, rows, textEls[i], textBlocks[i]));",
        1,
    )
    js = js.replace(
        "await populateTwoCol(twoEls[i], source.twoColumn[i], report, `${pageName} Two Column ${i+1}`);",
        "await populateTwoCol(twoEls[i], source.twoColumn[i], report, locCtx(`${pageName} Two Column ${i+1}`, rows, twoEls[i], source.twoColumn[i]));",
        1,
    )
    js = js.replace(
        "await setComponentImageProp(twoEls[i], sectionImageSpec(pageName, i), report, `${pageName} Two Column ${i+1} section image`);",
        "await setComponentImageProp(twoEls[i], sectionImageSpec(pageName, i), report, locCtx(`${pageName} Two Column ${i+1} section image`, rows, twoEls[i], source.twoColumn[i]));",
        1,
    )
    js = js.replace(
        "if (gw[0] && source.gatewayLinks?.length) await populateGateway(gw[0], source.gatewayLinks, report, `${pageName} Gateway CTA`);",
        "if (gw[0] && source.gatewayLinks?.length) await populateGateway(gw[0], source.gatewayLinks, report, locCtx(`${pageName} Gateway CTA`, rows, gw[0], source.gatewayLinks[0]));\n"
        "    else if (source.gatewayLinks?.length) report.warnings.push(`${pageName}: Gateway links not placed; first link from source child ${source.gatewayLinks[0]?.sourceChildNumber || '?'}`);",
        1,
    )
    js = js.replace(
        "for (let i=0; i<Math.min(dlEls.length, (source.downloads || []).length); i++) await populateDownloads(dlEls[i], source.downloads[i], report, `${pageName} Downloads ${i+1}`);",
        "for (let i=0; i<Math.min(dlEls.length, (source.downloads || []).length); i++) await populateDownloads(dlEls[i], source.downloads[i], report, locCtx(`${pageName} Downloads ${i+1}`, rows, dlEls[i], source.downloads[i]));",
        1,
    )
    js = js.replace(
        "if (next[0]) await populateNext(next[0], source.nextPages || [], report, `${pageName} Next Pages`);",
        "if (next[0]) await populateNext(next[0], source.nextPages || [], report, locCtx(`${pageName} Next Pages`, rows, next[0], null));",
        1,
    )
    return js


def forbidden_scan(js: str) -> Dict[str, Any]:
    # Allowed: this importer mentions insert and delete in comments/guards, but should never publish/delete.
    patterns = [r"\.publish\s*\(", r"deletePage\s*\(", r"remove\s*\(", r"unregister", r"redirect"]
    hits = []
    for pat in patterns:
        for m in re.finditer(pat, js, re.I):
            hits.append({"pattern": pat, "index": m.start(), "snippet": js[max(0, m.start()-60):m.start()+120]})
    return {"ok": len(hits) == 0, "hits": hits}


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("source_json", nargs="?", default=str(OUT_DIR / "krb-about-us-scrape-normalized-v1.json"))
    ap.add_argument("--base-js", default=str(BASE_JS))
    ap.add_argument("--out-js", default=str(OUT_DIR / "krb-live-scrape-json-import-v1.js"))
    ap.add_argument("--notes", default=str(OUT_DIR / "krb-live-scrape-json-import-v1-notes.md"))
    ap.add_argument("--zip", default=str(OUT_DIR / "krb-live-scrape-json-import-v1.zip"))
    args = ap.parse_args()

    raw = load_json(Path(args.source_json))
    source = normalise_for_v7(raw)
    expected = expected_from_source(source)
    galleries = gallery_manifest_from_source(source)

    js = Path(args.base_js).read_text(encoding="utf-8")
    js = replace_const(js, "SOURCE", source)
    js = replace_const(js, "GALLERY_MANIFEST", galleries)
    js = replace_const(js, "EXPECTED", expected)
    js = patch_importer(js)

    scan = forbidden_scan(js)
    if not scan["ok"]:
        raise SystemExit(f"Forbidden operation scan failed: {json.dumps(scan, indent=2)}")

    out_js = Path(args.out_js); out_js.write_text(js, encoding="utf-8")
    notes = Path(args.notes)
    notes.write_text(
        "# KRB live-scrape JSON import v1\n\n"
        f"Source JSON: `{args.source_json}`\n\n"
        f"Base importer: `{args.base_js}`\n\n"
        "## Defaults\n"
        "- `CONFIG.dryRun: true` by default. Set to `false` only when ready to write.\n"
        "- `CONFIG.autoRunAllPages: false` by default. Set to `true` for a page-group batch after dry-run review.\n"
        "- No publish/delete/redirect/global style operations.\n\n"
        "## Report behaviour\n"
        "- Known blocks are mapped into existing v7 component population logic.\n"
        "- Unmatched source sections are preserved in `report.unmatchedSourceSections` and copied into `manual`.\n"
        "- Context strings include `source child N` and, when discoverable, `page slot child N`.\n"
        "- Rich-content images stripped from text are reported via `sourceAttention`.\n\n"
        "## Expected components\n"
        "```json\n" + json.dumps(expected, indent=2, ensure_ascii=False) + "\n```\n",
        encoding="utf-8",
    )
    with zipfile.ZipFile(args.zip, "w", zipfile.ZIP_DEFLATED) as z:
        z.write(out_js, out_js.name)
        z.write(notes, notes.name)
        z.write(Path(args.source_json), Path(args.source_json).name)
    print(json.dumps({"outJs": str(out_js), "notes": str(notes), "zip": args.zip, "pages": list(source.keys()), "expected": expected}, indent=2, ensure_ascii=False))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
