#!/usr/bin/env python3
from __future__ import annotations

import csv
import re
import shutil
import urllib.parse
from pathlib import Path

BASE = Path('/Users/iggy/.hermes/profiles/ignite_team/outbound/krb-image-grab')
PAGE_DIR = BASE / 'webflow-import-assets' / 'Page images'
MANIFEST = BASE / 'krb-webflow-page-images-import-set.csv'
OUT_MANIFEST = BASE / 'krb-webflow-page-images-import-set-prefixed.csv'
BACKUP_MANIFEST = BASE / 'krb-webflow-page-images-import-set.before-page-prefix.csv'

PREFIX_RE = re.compile(r'^page__[a-z0-9-]+(?:__[a-z0-9-]+)*__', re.I)


def page_slug(sample_page_urls: str) -> str:
    first = (sample_page_urls or '').split(' | ')[0].strip()
    path = urllib.parse.urlparse(first).path.strip('/') if first else ''
    if not path:
        return 'home'
    parts = [p for p in path.split('/') if p]
    # Keep enough context to distinguish child pages, but avoid overlong names.
    slug = '__'.join(parts)
    slug = re.sub(r'[^a-zA-Z0-9_-]+', '-', slug).strip('-_').lower()
    return slug or 'home'


def unique_path(path: Path) -> Path:
    if not path.exists():
        return path
    stem, suffix = path.stem, path.suffix
    i = 2
    while True:
        candidate = path.with_name(f'{stem}__{i}{suffix}')
        if not candidate.exists():
            return candidate
        i += 1


def main() -> None:
    if not MANIFEST.exists():
        raise SystemExit(f'Missing manifest: {MANIFEST}')
    if not PAGE_DIR.exists():
        raise SystemExit(f'Missing Page images dir: {PAGE_DIR}')

    if not BACKUP_MANIFEST.exists():
        shutil.copy2(MANIFEST, BACKUP_MANIFEST)

    rows = list(csv.DictReader(MANIFEST.open(newline='')))
    if not rows:
        raise SystemExit('No rows found')

    renamed = 0
    missing = 0
    for row in rows:
        old_path = Path(row['download_path'])
        if not old_path.exists():
            # If manifest already points at a previous path, try by filename in Page images.
            old_path = PAGE_DIR / row['filename']
        if not old_path.exists():
            row['rename_status'] = 'missing'
            row['original_filename'] = row.get('original_filename') or row['filename']
            missing += 1
            continue

        original_filename = row.get('original_filename') or row['filename']
        # Prevent double-prefixing if this script is rerun.
        unprefixed = PREFIX_RE.sub('', old_path.name)
        if PREFIX_RE.match(original_filename):
            original_filename = PREFIX_RE.sub('', original_filename)

        slug = page_slug(row.get('sample_page_urls', ''))
        desired_name = f'page__{slug}__{unprefixed}'
        target = PAGE_DIR / desired_name
        if old_path.resolve() == target.resolve():
            row['rename_status'] = 'already-prefixed'
        else:
            target = unique_path(target)
            old_path.rename(target)
            row['rename_status'] = 'renamed'
            renamed += 1

        row['original_filename'] = original_filename
        row['filename'] = target.name if 'target' in locals() and target.exists() else old_path.name
        row['download_path'] = str(target if 'target' in locals() and target.exists() else old_path)
        row['asset_folder'] = 'Page images'
        row['page_prefix'] = slug

    fieldnames = list(rows[0].keys())
    for extra in ['original_filename', 'page_prefix', 'rename_status']:
        if extra not in fieldnames:
            fieldnames.append(extra)
    with OUT_MANIFEST.open('w', newline='') as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writeheader(); w.writerows(rows)
    # Also make the prefixed manifest the active page-images manifest for import.
    shutil.copy2(OUT_MANIFEST, MANIFEST)

    print('DONE')
    print(f'page image files found in manifest: {len(rows)}')
    print(f'renamed: {renamed}')
    print(f'missing: {missing}')
    print(f'active manifest: {MANIFEST}')
    print(f'prefixed manifest: {OUT_MANIFEST}')
    print(f'backup manifest: {BACKUP_MANIFEST}')


if __name__ == '__main__':
    main()
