#!/usr/bin/env python3 """ Update Social Media profiles in NordaBiz database. Reads from social_media_found.json and updates company_social_media table. Usage: python update_social_media.py [--dry-run] """ import os import json import sys from datetime import datetime # Database connection: Use environment variable DATABASE_URL # WARNING: DATABASE_URL must be set before running this script. # Example: export DATABASE_URL='postgresql://nordabiz_app:PASSWORD@localhost:5432/nordabiz' # The database module will fall back to a safe placeholder if not set. # NEVER commit real credentials to version control (CWE-798). from database import SessionLocal, Company, CompanySocialMedia from sqlalchemy import func PLATFORMS = ['facebook', 'instagram', 'youtube', 'linkedin', 'tiktok', 'twitter'] def load_social_media_data(): """Load social media data from JSON file.""" with open('social_media_found.json', 'r') as f: data = json.load(f) return data['results'] def update_social_media(dry_run=False): """Update social media profiles in database.""" db = SessionLocal() try: results = load_social_media_data() stats = { 'companies_processed': 0, 'profiles_added': 0, 'profiles_updated': 0, 'profiles_unchanged': 0, 'errors': 0 } changes = [] for item in results: company_id = item['company_id'] company_name = item['name'] stats['companies_processed'] += 1 # Check if company exists company = db.query(Company).filter(Company.id == company_id).first() if not company: print(f" WARNING: Company ID {company_id} ({company_name}) not found in database") stats['errors'] += 1 continue for platform in PLATFORMS: url = item.get(platform) if not url: continue # Check if profile already exists existing = db.query(CompanySocialMedia).filter( CompanySocialMedia.company_id == company_id, CompanySocialMedia.platform == platform ).first() if existing: if existing.url != url: old_url = existing.url if not dry_run: existing.url = url existing.verified_at = datetime.now() existing.source = 'web_search_2025-12-29' changes.append({ 'company_id': company_id, 'company_name': company_name, 'platform': platform, 'action': 'updated', 'old_url': old_url, 'new_url': url }) stats['profiles_updated'] += 1 else: stats['profiles_unchanged'] += 1 else: if not dry_run: new_profile = CompanySocialMedia( company_id=company_id, platform=platform, url=url, verified_at=datetime.now(), source='web_search_2025-12-29', is_valid=True ) db.add(new_profile) changes.append({ 'company_id': company_id, 'company_name': company_name, 'platform': platform, 'action': 'added', 'url': url }) stats['profiles_added'] += 1 if not dry_run: db.commit() print("\n=== Changes committed to database ===") else: print("\n=== DRY RUN - No changes made ===") # Print statistics print(f"\n=== Statistics ===") print(f"Companies processed: {stats['companies_processed']}") print(f"Profiles added: {stats['profiles_added']}") print(f"Profiles updated: {stats['profiles_updated']}") print(f"Profiles unchanged: {stats['profiles_unchanged']}") print(f"Errors: {stats['errors']}") # Print changes summary if changes: print(f"\n=== Changes Summary ===") for change in changes: if change['action'] == 'added': print(f" + {change['company_name']} | {change['platform']}: {change['url']}") else: print(f" ~ {change['company_name']} | {change['platform']}: {change['old_url']} -> {change['new_url']}") return stats, changes except Exception as e: db.rollback() print(f"ERROR: {e}") raise finally: db.close() def print_summary_table(changes): """Print a summary table of all social media by platform.""" print("\n" + "="*80) print("SOCIAL MEDIA SUMMARY - NordaBiz 2025-12-29") print("="*80) with open('social_media_found.json', 'r') as f: data = json.load(f) results = data['results'] # Count by platform platform_counts = {p: 0 for p in PLATFORMS} for item in results: for platform in PLATFORMS: if item.get(platform): platform_counts[platform] += 1 print("\nPlatform Statistics:") print("-" * 40) for platform, count in sorted(platform_counts.items(), key=lambda x: -x[1]): bar = "#" * (count // 2) print(f" {platform:12} | {count:2} | {bar}") total_profiles = sum(platform_counts.values()) companies_with_any = sum(1 for r in results if any(r.get(p) for p in PLATFORMS)) companies_without = len(results) - companies_with_any print(f"\nTotal profiles found: {total_profiles}") print(f"Companies with social media: {companies_with_any}") print(f"Companies without social media: {companies_without}") if __name__ == '__main__': dry_run = '--dry-run' in sys.argv print("="*60) print("NordaBiz Social Media Update") print("="*60) if dry_run: print("MODE: Dry run (no changes will be made)") else: print("MODE: Live update") stats, changes = update_social_media(dry_run=dry_run) print_summary_table(changes)