#!/usr/bin/env python3 """ Import CEIDG data (PKD, owner, start date) to database. Reads cached CEIDG JSON files and updates company records with: - PKD code and description - Business start date - Owner name (for JDG without KRS) Usage: python scripts/import_ceidg_to_db.py # Dry run python scripts/import_ceidg_to_db.py --apply # Apply changes python scripts/import_ceidg_to_db.py --nip 5881571773 # Single company """ import os import sys import json import argparse from pathlib import Path from datetime import datetime # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) # Load environment from dotenv import load_dotenv load_dotenv(Path(__file__).parent.parent / '.env') from database import SessionLocal, Company def find_latest_ceidg_json() -> Path: """Find the most recent CEIDG JSON file.""" json_dir = Path(__file__).parent.parent / "data" / "ceidg_json" if not json_dir.exists(): return None json_files = list(json_dir.glob("ceidg_data_*.json")) if not json_files: return None # Sort by modification time, newest first return sorted(json_files, key=lambda f: f.stat().st_mtime, reverse=True)[0] def load_ceidg_data(json_path: Path) -> list: """Load CEIDG data from JSON file.""" with open(json_path, 'r', encoding='utf-8') as f: return json.load(f) def import_ceidg_to_db(apply: bool = False, target_nip: str = None): """ Import CEIDG data to database. Args: apply: If True, actually apply changes. If False, dry run. target_nip: If set, only process this NIP. """ json_path = find_latest_ceidg_json() if not json_path: print("ERROR: No CEIDG JSON files found in data/ceidg_json/") return print(f"Loading CEIDG data from: {json_path.name}") ceidg_records = load_ceidg_data(json_path) print(f"Found {len(ceidg_records)} CEIDG records") db = SessionLocal() updated = 0 skipped = 0 not_found = 0 try: for record in ceidg_records: nip = record.get('nip') if not nip: continue # Filter by target NIP if specified if target_nip and nip != target_nip: continue # Find company by NIP company = db.query(Company).filter(Company.nip == nip).first() if not company: not_found += 1 if target_nip: print(f" NOT FOUND: NIP {nip}") continue # Extract CEIDG data pkd_code = record.get('pkd_glowny') pkd_description = record.get('pkd_opis') start_date_str = record.get('data_rozpoczecia') owner = record.get('wlasciciel', {}) owner_first = owner.get('imie', '').title() if owner else None owner_last = owner.get('nazwisko', '').title() if owner else None # Parse start date business_start_date = None if start_date_str: try: business_start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date() except ValueError: pass # Check if there are any changes changes = [] if pkd_code and company.pkd_code != pkd_code: changes.append(f"PKD: {company.pkd_code} → {pkd_code}") if pkd_description and company.pkd_description != pkd_description: changes.append(f"PKD opis: {'set' if not company.pkd_description else 'update'}") if business_start_date and company.business_start_date != business_start_date: changes.append(f"Data rozpoczęcia: {company.business_start_date} → {business_start_date}") if owner_first and company.owner_first_name != owner_first: changes.append(f"Właściciel: {owner_first} {owner_last}") if not changes: skipped += 1 continue # Print changes print(f"\n{company.name} (NIP: {nip}):") for change in changes: print(f" • {change}") if apply: # Apply updates if pkd_code: company.pkd_code = pkd_code if pkd_description: company.pkd_description = pkd_description if business_start_date: company.business_start_date = business_start_date if owner_first: company.owner_first_name = owner_first if owner_last: company.owner_last_name = owner_last updated += 1 if apply: db.commit() print(f"\n✅ Zaktualizowano {updated} firm") else: print(f"\n🔍 DRY RUN: {updated} firm do aktualizacji") print(" Użyj --apply aby zapisać zmiany") print(f" Pominięto (bez zmian): {skipped}") print(f" Nie znaleziono w bazie: {not_found}") finally: db.close() def main(): parser = argparse.ArgumentParser(description='Import CEIDG data to database') parser.add_argument('--apply', action='store_true', help='Apply changes (default: dry run)') parser.add_argument('--nip', type=str, help='Process only this NIP') args = parser.parse_args() print("=" * 60) print("CEIDG → Database Import") print("=" * 60) import_ceidg_to_db(apply=args.apply, target_nip=args.nip) if __name__ == '__main__': main()