#!/usr/bin/env python3
from __future__ import annotations

import csv
import hashlib
import json
import mimetypes
import os
import sys
import time
from pathlib import Path

import requests

SITE_ID = '6a1e37436b332da28ecc3001'
API_BASE = 'https://api.webflow.com/v2'
SOURCE_DIR = Path('/Users/iggy/.hermes/profiles/ignite_team/outbound/krb-image-grab/webflow-upload/Page images')
OUT = Path('/Users/iggy/.hermes/profiles/ignite_team/outbound/krb-image-grab')
UPLOAD_MANIFEST = OUT / 'krb-webflow-page-images-uploaded-to-webflow.csv'
FOLDER_JSON = OUT / 'krb-webflow-page-images-folder.json'


def api_headers(token: str) -> dict[str, str]:
    return {
        'Authorization': f'Bearer {token}',
        'accept': 'application/json',
    }


def json_headers(token: str) -> dict[str, str]:
    h = api_headers(token)
    h['content-type'] = 'application/json'
    return h


def request_with_retry(method: str, url: str, **kwargs) -> requests.Response:
    for attempt in range(1, 6):
        r = requests.request(method, url, timeout=90, **kwargs)
        if r.status_code != 429 and r.status_code < 500:
            return r
        wait = int(r.headers.get('Retry-After', '0') or '0') or min(2 ** attempt, 30)
        print(f'  retry {attempt} after {wait}s for {method} {url} status={r.status_code}', flush=True)
        time.sleep(wait)
    return r


def list_folders(token: str) -> list[dict]:
    folders = []
    offset = 0
    while True:
        r = request_with_retry('GET', f'{API_BASE}/sites/{SITE_ID}/asset_folders', headers=api_headers(token), params={'limit': 100, 'offset': offset})
        if r.status_code != 200:
            raise RuntimeError(f'List folders failed: {r.status_code} {r.text}')
        data = r.json()
        folders.extend(data.get('assetFolders', []))
        pag = data.get('pagination') or {}
        if offset + pag.get('limit', 100) >= pag.get('total', len(folders)):
            break
        offset += pag.get('limit', 100)
    return folders


def get_or_create_page_folder(token: str) -> dict:
    for f in list_folders(token):
        if f.get('displayName') == 'Page images' and f.get('parentFolder') in (None, ''):
            return f
    r = request_with_retry('POST', f'{API_BASE}/sites/{SITE_ID}/asset_folders', headers=json_headers(token), json={'displayName': 'Page images', 'parentFolder': None})
    if r.status_code not in (200, 201, 202):
        raise RuntimeError(f'Create folder failed: {r.status_code} {r.text}')
    return r.json()


def md5_file(path: Path) -> str:
    h = hashlib.md5()
    with path.open('rb') as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b''):
            h.update(chunk)
    return h.hexdigest()


def upload_asset(token: str, folder_id: str, path: Path) -> dict:
    file_hash = md5_file(path)
    create_body = {'fileName': path.name, 'fileHash': file_hash, 'parentFolder': folder_id}
    r = request_with_retry('POST', f'{API_BASE}/sites/{SITE_ID}/assets', headers=json_headers(token), json=create_body)
    if r.status_code not in (200, 201, 202):
        raise RuntimeError(f'Create asset failed for {path.name}: {r.status_code} {r.text}')
    data = r.json()
    upload_url = data.get('uploadUrl')
    upload_details = data.get('uploadDetails') or {}
    if upload_url:
        content_type = data.get('contentType') or upload_details.get('content-type') or mimetypes.guess_type(path.name)[0] or 'application/octet-stream'
        form_data = dict(upload_details)
        # Webflow/S3 usually includes content-type in uploadDetails; keep it consistent.
        with path.open('rb') as f:
            files = {'file': (path.name, f, content_type)}
            sr = request_with_retry('POST', upload_url, data=form_data, files=files)
        if sr.status_code not in (200, 201, 202, 204):
            raise RuntimeError(f'S3 upload failed for {path.name}: {sr.status_code} {sr.text[:500]}')
    data['fileName'] = path.name
    data['fileHash'] = file_hash
    data['localBytes'] = path.stat().st_size
    return data


def list_assets_in_folder(token: str, folder_id: str) -> list[dict]:
    assets = []
    offset = 0
    while True:
        r = request_with_retry('GET', f'{API_BASE}/sites/{SITE_ID}/assets', headers=api_headers(token), params={'folderId': folder_id, 'limit': 100, 'offset': offset})
        if r.status_code != 200:
            raise RuntimeError(f'List assets failed: {r.status_code} {r.text}')
        data = r.json()
        assets.extend(data.get('assets', []))
        pag = data.get('pagination') or {}
        if offset + pag.get('limit', 100) >= pag.get('total', len(assets)):
            break
        offset += pag.get('limit', 100)
    return assets


def main() -> None:
    token = os.environ.get('WEBFLOW_TOKEN')
    if not token:
        raise SystemExit('WEBFLOW_TOKEN env var required')
    if not SOURCE_DIR.exists():
        raise SystemExit(f'Missing source dir: {SOURCE_DIR}')
    files = sorted([p for p in SOURCE_DIR.iterdir() if p.is_file()])
    if len(files) != 65:
        raise SystemExit(f'Expected 65 files, found {len(files)}')

    folder = get_or_create_page_folder(token)
    FOLDER_JSON.write_text(json.dumps(folder, indent=2))
    folder_id = folder['id']
    print(f'Using folder Page images: {folder_id}')

    rows = []
    for i, path in enumerate(files, 1):
        print(f'[{i}/{len(files)}] uploading {path.name}', flush=True)
        try:
            result = upload_asset(token, folder_id, path)
            rows.append({
                'filename': path.name,
                'asset_id': result.get('id', ''),
                'parent_folder': result.get('parentFolder', folder_id),
                'upload_url_present': 'yes' if result.get('uploadUrl') else 'no',
                'content_type': result.get('contentType', ''),
                'file_hash': result.get('fileHash', ''),
                'local_bytes': result.get('localBytes', ''),
                'status': 'uploaded',
            })
        except Exception as e:
            rows.append({
                'filename': path.name,
                'asset_id': '',
                'parent_folder': folder_id,
                'upload_url_present': '',
                'content_type': '',
                'file_hash': md5_file(path),
                'local_bytes': path.stat().st_size,
                'status': f'error: {e}',
            })
            print(f'  ERROR {e}', file=sys.stderr, flush=True)

    with UPLOAD_MANIFEST.open('w', newline='') as f:
        fieldnames = ['filename', 'asset_id', 'parent_folder', 'upload_url_present', 'content_type', 'file_hash', 'local_bytes', 'status']
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writeheader(); w.writerows(rows)

    assets = list_assets_in_folder(token, folder_id)
    (OUT / 'krb-webflow-page-images-assets-readback.json').write_text(json.dumps(assets, indent=2))
    print('DONE')
    print(f'folder_id: {folder_id}')
    print(f'attempted: {len(rows)}')
    print(f'uploaded rows: {sum(1 for r in rows if r["status"] == "uploaded")}')
    print(f'readback assets in folder: {len(assets)}')
    print(f'manifest: {UPLOAD_MANIFEST}')


if __name__ == '__main__':
    main()
