nordabiz/scripts/import_ceidg_to_db.py
Maciej Pienczyn abe1cd38a1 feat: Add PKD codes and CEIDG owner data to company profiles
- Add pkd_code, pkd_description columns for business activity classification
- Add business_start_date column from CEIDG
- Add owner_first_name, owner_last_name for JDG companies
- Create import script scripts/import_ceidg_to_db.py
- Add PKD card display in company profile template
- Add owner section for JDG companies without KRS
- Track SQL migrations in git (database/migrations/*.sql)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-13 16:07:03 +01:00

174 lines
5.5 KiB
Python

#!/usr/bin/env python3
"""
Import CEIDG data (PKD, owner, start date) to database.
Reads cached CEIDG JSON files and updates company records with:
- PKD code and description
- Business start date
- Owner name (for JDG without KRS)
Usage:
python scripts/import_ceidg_to_db.py # Dry run
python scripts/import_ceidg_to_db.py --apply # Apply changes
python scripts/import_ceidg_to_db.py --nip 5881571773 # Single company
"""
import os
import sys
import json
import argparse
from pathlib import Path
from datetime import datetime
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent.parent))
# Load environment
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent.parent / '.env')
from database import SessionLocal, Company
def find_latest_ceidg_json() -> Path:
"""Find the most recent CEIDG JSON file."""
json_dir = Path(__file__).parent.parent / "data" / "ceidg_json"
if not json_dir.exists():
return None
json_files = list(json_dir.glob("ceidg_data_*.json"))
if not json_files:
return None
# Sort by modification time, newest first
return sorted(json_files, key=lambda f: f.stat().st_mtime, reverse=True)[0]
def load_ceidg_data(json_path: Path) -> list:
"""Load CEIDG data from JSON file."""
with open(json_path, 'r', encoding='utf-8') as f:
return json.load(f)
def import_ceidg_to_db(apply: bool = False, target_nip: str = None):
"""
Import CEIDG data to database.
Args:
apply: If True, actually apply changes. If False, dry run.
target_nip: If set, only process this NIP.
"""
json_path = find_latest_ceidg_json()
if not json_path:
print("ERROR: No CEIDG JSON files found in data/ceidg_json/")
return
print(f"Loading CEIDG data from: {json_path.name}")
ceidg_records = load_ceidg_data(json_path)
print(f"Found {len(ceidg_records)} CEIDG records")
db = SessionLocal()
updated = 0
skipped = 0
not_found = 0
try:
for record in ceidg_records:
nip = record.get('nip')
if not nip:
continue
# Filter by target NIP if specified
if target_nip and nip != target_nip:
continue
# Find company by NIP
company = db.query(Company).filter(Company.nip == nip).first()
if not company:
not_found += 1
if target_nip:
print(f" NOT FOUND: NIP {nip}")
continue
# Extract CEIDG data
pkd_code = record.get('pkd_glowny')
pkd_description = record.get('pkd_opis')
start_date_str = record.get('data_rozpoczecia')
owner = record.get('wlasciciel', {})
owner_first = owner.get('imie', '').title() if owner else None
owner_last = owner.get('nazwisko', '').title() if owner else None
# Parse start date
business_start_date = None
if start_date_str:
try:
business_start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date()
except ValueError:
pass
# Check if there are any changes
changes = []
if pkd_code and company.pkd_code != pkd_code:
changes.append(f"PKD: {company.pkd_code}{pkd_code}")
if pkd_description and company.pkd_description != pkd_description:
changes.append(f"PKD opis: {'set' if not company.pkd_description else 'update'}")
if business_start_date and company.business_start_date != business_start_date:
changes.append(f"Data rozpoczęcia: {company.business_start_date}{business_start_date}")
if owner_first and company.owner_first_name != owner_first:
changes.append(f"Właściciel: {owner_first} {owner_last}")
if not changes:
skipped += 1
continue
# Print changes
print(f"\n{company.name} (NIP: {nip}):")
for change in changes:
print(f"{change}")
if apply:
# Apply updates
if pkd_code:
company.pkd_code = pkd_code
if pkd_description:
company.pkd_description = pkd_description
if business_start_date:
company.business_start_date = business_start_date
if owner_first:
company.owner_first_name = owner_first
if owner_last:
company.owner_last_name = owner_last
updated += 1
if apply:
db.commit()
print(f"\n✅ Zaktualizowano {updated} firm")
else:
print(f"\n🔍 DRY RUN: {updated} firm do aktualizacji")
print(" Użyj --apply aby zapisać zmiany")
print(f" Pominięto (bez zmian): {skipped}")
print(f" Nie znaleziono w bazie: {not_found}")
finally:
db.close()
def main():
parser = argparse.ArgumentParser(description='Import CEIDG data to database')
parser.add_argument('--apply', action='store_true', help='Apply changes (default: dry run)')
parser.add_argument('--nip', type=str, help='Process only this NIP')
args = parser.parse_args()
print("=" * 60)
print("CEIDG → Database Import")
print("=" * 60)
import_ceidg_to_db(apply=args.apply, target_nip=args.nip)
if __name__ == '__main__':
main()