import json, os, subprocess, random, re, pathlib, io, ftplib, time, collections, ssl
from pathlib import Path
from PIL import Image, ImageOps
OUT=Path('/Users/iggy/.hermes/profiles/ignite_team/outbound/clarendon_media_audit')
INV=json.loads((OUT/'file_inventory.json').read_text())
files=INV['files']; summ=INV['summary']; UP=summ['uploads_path']
var_re=re.compile(r'-\d+x\d+(?:@[0-9]+x)?(?=\.(?:jpe?g|png|webp|gif)$)', re.I)
img_ext={'.jpg','.jpeg','.png','.webp','.gif'}
variants=[f for f in files if pathlib.Path(f['path']).suffix.lower() in img_ext and var_re.search(pathlib.Path(f['path']).name)]
random.seed(42); by_ext=collections.defaultdict(list)
for f in variants: by_ext[pathlib.Path(f['path']).suffix.lower()].append(f)
selected=[]; strata_info={}
for ext, arr in by_ext.items():
    arr=sorted(arr, key=lambda x:x['size']); n=len(arr)
    if ext in ['.jpg','.jpeg']: bins=10; per_bin=35 if ext=='.jpg' else 20
    elif ext=='.png': bins=10; per_bin=30
    else: bins=1; per_bin=999
    for b in range(bins):
        part=arr[b*n//bins:(b+1)*n//bins]
        if not part: continue
        k=min(per_bin, len(part)); sample=random.sample(part,k) if len(part)>k else part
        selected.extend(sample)
        strata_info[f'{ext}:bin{b+1}']={'count':len(part),'bytes':sum(x['size'] for x in part),'sample_paths':set(x['path'] for x in sample)}
seen={x['path'] for x in selected}
for x in sorted(variants,key=lambda x:x['size'], reverse=True)[:80]:
    if x['path'] not in seen: selected.append(x); seen.add(x['path'])
VAULT='Vault for Iggy (IGNITE OpenClaw Bot)'; ITEM='emtrkfztclkv4vtumb6nsa7a4e'; OP_ENV_KEY='OP_SERVICE_ACCOUNT_TOKEN'; WSDL='https://api.synergywholesale.com/?wsdl'
def load_token():
    for p in [Path('/Users/iggy/.hermes/profiles/ignite_team/.env'), Path('/Users/iggy/.hermes/.env')]:
        if p.exists():
            for line in p.read_text(errors='replace').splitlines():
                s=line.strip()
                if not s or s.startswith('#'): continue
                if s.startswith('export '): s=s[7:].strip()
                if '=' in s:
                    k,v=s.split('=',1)
                    if k.strip()==OP_ENV_KEY: return v.strip().strip('"').strip("'")
    return os.environ.get(OP_ENV_KEY)
def op_fields():
    env=os.environ.copy(); tok=load_token()
    if tok: env[OP_ENV_KEY]=tok
    cp=subprocess.run(['op','item','get',ITEM,'--vault',VAULT,'--format','json','--reveal'],env=env,text=True,capture_output=True,check=True)
    return {f.get('label'):f.get('value') or '' for f in json.loads(cp.stdout).get('fields',[])}
import requests, urllib3
from zeep import Client, Settings
from zeep.helpers import serialize_object
from zeep.transports import Transport
urllib3.disable_warnings(); f=op_fields(); client=Client(WSDL,settings=Settings(strict=False,xml_huge_tree=True),transport=Transport(session=requests.Session(), timeout=60))
records=[]
for page in range(1,20):
    resp=serialize_object(client.service.listHosting({'resellerID':f['username'],'apiKey':f['credential'],'page':page,'limit':100}))
    batch=resp.get('hoidList') or []; records.extend(batch)
    if len(batch)<100: break
rec=[x for x in records if x.get('domain')=='clarendon.vic.edu.au'][0]
ctx=ssl._create_unverified_context()
ftp=ftplib.FTP_TLS(context=ctx, timeout=60)
ftp.connect(rec['server'], 21); ftp.auth(); ftp.prot_p(); ftp.login(rec['username'], rec['password'])
def fetch(path):
    rel=path.replace('/home/clarendo/','')
    bio=io.BytesIO(); ftp.retrbinary('RETR '+rel, bio.write); return bio.getvalue()
def optimise_bytes(data, ext):
    im=Image.open(io.BytesIO(data))
    try: im=ImageOps.exif_transpose(im)
    except Exception: pass
    out=io.BytesIO(); ext=ext.lower()
    if ext in ['.jpg','.jpeg']:
        if im.mode not in ('RGB','L'): im=im.convert('RGB')
        im.save(out, format='JPEG', quality=85, optimize=True, progressive=True)
    elif ext=='.png':
        im.save(out, format='PNG', optimize=True, compress_level=9)
    elif ext=='.webp':
        if getattr(im,'is_animated',False): return None
        im.save(out, format='WEBP', quality=82, method=6)
    else: return None
    return out.getvalue()
results=[]; errors=[]; start=time.time()
for i,f0 in enumerate(selected,1):
    ext=pathlib.Path(f0['path']).suffix.lower()
    try:
        data=fetch(f0['path']); opt=optimise_bytes(data, ext)
        if opt is not None:
            results.append({'path':f0['path'],'ext':ext,'orig_size':len(data),'inventory_size':f0['size'],'opt_size':len(opt),'saving':max(0,len(data)-len(opt))})
    except Exception as e: errors.append({'path':f0['path'],'error':repr(e)[:300]})
    if i%100==0: print('sampled',i,'ok',len(results),'errors',len(errors), flush=True)
try: ftp.quit()
except Exception: pass
res_by_path={r['path']:r for r in results}; est_total=0; est_by_stratum={}
for key,info in strata_info.items():
    rs=[res_by_path[p] for p in info['sample_paths'] if p in res_by_path]
    if not rs: continue
    orig=sum(r['orig_size'] for r in rs); sav=sum(r['saving'] for r in rs); ratio=sav/orig if orig else 0
    est=ratio*info['bytes']; est_total+=est
    est_by_stratum[key]={'population_count':info['count'],'population_bytes':info['bytes'],'sample_count':len(rs),'sample_orig_bytes':orig,'sample_saving_ratio':ratio,'estimated_saving_bytes':est}
by_ext_res=collections.defaultdict(lambda: {'count':0,'orig':0,'opt':0,'saving':0})
for r in results:
    d=by_ext_res[r['ext']]; d['count']+=1; d['orig']+=r['orig_size']; d['opt']+=r['opt_size']; d['saving']+=r['saving']
summary={'method':'Stratified sample of WordPress-generated image variants; JPEG q85 optimize/progressive, PNG lossless Pillow optimize, WebP q82. Estimates weighted by extension+size decile strata.', 'variant_population_files':len(variants),'variant_population_bytes':sum(f['size'] for f in variants),'sample_selected':len(selected),'sample_success':len(results),'sample_errors':len(errors),'sample_original_bytes':sum(r['orig_size'] for r in results),'sample_optimised_bytes':sum(r['opt_size'] for r in results),'sample_saving_bytes':sum(r['saving'] for r in results),'estimated_saving_bytes':round(est_total),'estimated_post_optimisation_variant_bytes':round(sum(f['size'] for f in variants)-est_total),'by_ext_sample':by_ext_res,'by_stratum_estimate':est_by_stratum,'errors':errors[:20],'duration_sec':round(time.time()-start,1),'largest_sample_savings':sorted(results,key=lambda r:r['saving'], reverse=True)[:50]}
(OUT/'optimisation_sample_estimate.json').write_text(json.dumps(summary, indent=2))
print(json.dumps({k:summary[k] for k in ['variant_population_files','variant_population_bytes','sample_success','sample_errors','sample_original_bytes','sample_saving_bytes','estimated_saving_bytes','estimated_post_optimisation_variant_bytes','duration_sec']}, indent=2))
for ext,d in by_ext_res.items(): print(ext, d)
print('saved', OUT/'optimisation_sample_estimate.json')
