"""One-time fix: remove duplicate company_websites for Waterm (company_id=12)""" import os from sqlalchemy import create_engine, text engine = create_engine(os.environ['DATABASE_URL']) with engine.connect() as conn: rows = conn.execute(text( "SELECT id, url, website_type, source, is_primary FROM company_websites " "WHERE company_id = 12 ORDER BY id" )).fetchall() print("Before cleanup:") for r in rows: print(f" id={r[0]} url={r[1]} type={r[2]} source={r[3]} primary={r[4]}") # Keep only the latest record per URL (prefer manual_edit) keep = conn.execute(text( "SELECT DISTINCT ON (url) id FROM company_websites " "WHERE company_id = 12 " "ORDER BY url, CASE WHEN source = 'manual_edit' THEN 0 ELSE 1 END, id DESC" )).fetchall() keep_ids = [r[0] for r in keep] print(f"Keeping ids: {keep_ids}") if keep_ids: placeholders = ",".join(str(i) for i in keep_ids) conn.execute(text( f"DELETE FROM company_websites WHERE company_id = 12 AND id NOT IN ({placeholders})" )) conn.commit() rows2 = conn.execute(text( "SELECT id, url, website_type, source, is_primary FROM company_websites " "WHERE company_id = 12 ORDER BY id" )).fetchall() print("After cleanup:") for r in rows2: print(f" id={r[0]} url={r[1]} type={r[2]} source={r[3]} primary={r[4]}")