nordabiz/ceidg_api_service.py
Maciej Pienczyn a67d069f81
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
fix: Fix CEIDG enrichment - two-phase API fetch with correct field mapping
CEIDG enrichment was broken due to key mismatches (expected adres_ulica but API
returns adresDzialalnosci.ulica), writes to non-existent columns (address_building,
address_postal_code), and missing saves for ceidg_id/status/owner/PKD fields.

Now fetches full details via /firma/{id} endpoint (Phase 2) for complete data
including PKD list, correspondence address, and succession manager.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-18 14:58:41 +01:00

264 lines
9.2 KiB
Python

#!/usr/bin/env python3
"""
CEIDG API Service
==================
Service module for fetching company data from CEIDG (Centralna Ewidencja
i Informacja o Działalności Gospodarczej) using the official API v3 at
dane.biznes.gov.pl.
Two-phase fetching:
Phase 1: GET /firmy?nip=X → find company ID
Phase 2: GET /firma/{id} → full details (PKD list, correspondence address, etc.)
"""
import os
import logging
import requests
from typing import Optional, Dict, Any
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
logger = logging.getLogger(__name__)
# API Configuration
CEIDG_API_BASE = "https://dane.biznes.gov.pl/api/ceidg/v3"
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
CEIDG_TIMEOUT = 15 # seconds
def _fetch_firma_detail(firma_id: str, headers: dict) -> Optional[Dict[str, Any]]:
"""Phase 2: Fetch full company details from /firma/{id} endpoint."""
url = f"{CEIDG_API_BASE}/firma/{firma_id}"
try:
response = requests.get(url, headers=headers, timeout=CEIDG_TIMEOUT)
if response.status_code == 200:
return response.json()
logger.warning(f"CEIDG detail endpoint returned {response.status_code} for id={firma_id}")
except Exception as e:
logger.warning(f"CEIDG detail fetch failed for id={firma_id}: {e}")
return None
def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
"""
Fetch company data from CEIDG API by NIP (two-phase).
Phase 1: Search by NIP via /firmy?nip=X
Phase 2: Get full details via /firma/{id}
Returns normalized dict with all available CEIDG data, or None.
"""
if not CEIDG_API_KEY:
logger.warning("CEIDG_API_KEY not configured - CEIDG lookup disabled")
return None
# Clean NIP
nip = nip.strip().replace('-', '').replace(' ', '')
if not nip or len(nip) != 10 or not nip.isdigit():
logger.warning(f"Invalid NIP format: {nip}")
return None
headers = {
"Authorization": f"Bearer {CEIDG_API_KEY}",
"Accept": "application/json"
}
try:
# --- Phase 1: Search by NIP ---
logger.info(f"CEIDG Phase 1: searching for NIP {nip}")
response = requests.get(
f"{CEIDG_API_BASE}/firmy",
params={"nip": nip},
headers=headers,
timeout=CEIDG_TIMEOUT
)
if response.status_code == 401:
logger.error("CEIDG API authentication failed - check CEIDG_API_KEY")
return None
if response.status_code == 404:
logger.info(f"NIP {nip} not found in CEIDG")
return None
if response.status_code != 200:
logger.error(f"CEIDG API error: {response.status_code} - {response.text[:200]}")
return None
data = response.json()
# Handle response format - can be list or dict with 'firmy' key
if isinstance(data, list):
if not data:
logger.info(f"NIP {nip} not found in CEIDG (empty list)")
return None
firma_summary = data[0]
elif isinstance(data, dict):
if 'firmy' in data:
firmy = data.get('firmy', [])
if not firmy:
logger.info(f"NIP {nip} not found in CEIDG")
return None
firma_summary = firmy[0]
else:
firma_summary = data
else:
logger.error(f"Unexpected CEIDG response format: {type(data)}")
return None
# --- Phase 2: Fetch full details ---
firma_id = firma_summary.get('id')
firma = firma_summary # fallback if detail fetch fails
if firma_id:
logger.info(f"CEIDG Phase 2: fetching details for id={firma_id}")
detail = _fetch_firma_detail(firma_id, headers)
if detail:
firma = detail
logger.info(f"CEIDG Phase 2: got full details for id={firma_id}")
else:
logger.info(f"CEIDG Phase 2: detail unavailable, using summary data")
else:
# Try extracting id from 'link' field
link = firma_summary.get('link', '')
if '/firma/' in link:
firma_id = link.split('/firma/')[-1].strip('/')
detail = _fetch_firma_detail(firma_id, headers)
if detail:
firma = detail
# --- Build normalized result ---
# Owner info
wlasciciel = firma.get('wlasciciel', {}) or {}
# Address - handle both nested and flat formats
adres = firma.get('adresDzialalnosci', {}) or {}
if isinstance(adres, str):
adres = {}
# Correspondence address
adres_koresp = firma.get('adresKorespondencyjny', {}) or {}
if isinstance(adres_koresp, str):
adres_koresp = {}
# PKD - main and full list
pkd_glowny = firma.get('pkdGlowny', {}) or {}
pkd_lista = firma.get('pkd', []) or []
# Some responses use 'pkdPozostale' for additional PKD codes
if not pkd_lista:
pkd_pozostale = firma.get('pkdPozostale', []) or []
if pkd_glowny and pkd_pozostale:
pkd_lista = [pkd_glowny] + pkd_pozostale
elif pkd_glowny:
pkd_lista = [pkd_glowny]
# Succession manager
zarzadca = firma.get('zarzadcaSukcesyjny', {}) or {}
result = {
# Identity
'firma': firma.get('nazwa') or firma.get('firma') or firma.get('nazwaSkrocona'),
'nip': firma.get('nip') or nip,
'regon': firma.get('regon') or wlasciciel.get('regon'),
'ceidg_id': firma_id or firma.get('id'),
'status': firma.get('status'),
# Owner
'wlasciciel': {
'imie': wlasciciel.get('imie'),
'nazwisko': wlasciciel.get('nazwisko'),
} if wlasciciel else {},
'obywatelstwa': firma.get('obywatelstwa', []) or wlasciciel.get('obywatelstwa', []) or [],
# Dates
'dataRozpoczecia': (
firma.get('dataRozpoczeciaDzialalnosci')
or firma.get('dataWpisuDoCeidg')
),
# Business address
'adresDzialalnosci': {
'ulica': adres.get('ulica'),
'budynek': adres.get('budynek') or adres.get('nrDomu') or adres.get('nrBudynku'),
'lokal': adres.get('lokal') or adres.get('nrLokalu'),
'kod': adres.get('kodPocztowy') or adres.get('kod'),
'miasto': adres.get('miejscowosc') or adres.get('miasto'),
'wojewodztwo': adres.get('wojewodztwo'),
'powiat': adres.get('powiat'),
'gmina': adres.get('gmina'),
},
# Correspondence address
'adresKorespondencyjny': {
'ulica': adres_koresp.get('ulica'),
'budynek': adres_koresp.get('budynek') or adres_koresp.get('nrDomu'),
'lokal': adres_koresp.get('lokal') or adres_koresp.get('nrLokalu'),
'kod': adres_koresp.get('kodPocztowy') or adres_koresp.get('kod'),
'miasto': adres_koresp.get('miejscowosc') or adres_koresp.get('miasto'),
} if adres_koresp else {},
# PKD
'pkdGlowny': pkd_glowny,
'pkd': pkd_lista,
# Contact
'email': firma.get('email') or firma.get('adresEmail'),
'telefon': firma.get('telefon'),
'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'),
# Succession
'zarzadcaSukcesyjny': {
'imie': zarzadca.get('imie'),
'nazwisko': zarzadca.get('nazwisko'),
} if zarzadca.get('imie') or zarzadca.get('nazwisko') else None,
# Electronic delivery address
'adresDoreczenElektronicznych': firma.get('adresDoreczenElektronicznych'),
# Raw API response for template access
'raw': firma,
}
logger.info(f"CEIDG data found for NIP {nip}: {result['firma']} (PKD codes: {len(pkd_lista)})")
return result
except requests.exceptions.Timeout:
logger.error(f"CEIDG API timeout for NIP {nip}")
return None
except requests.exceptions.RequestException as e:
logger.error(f"CEIDG API request error for NIP {nip}: {e}")
return None
except Exception as e:
logger.error(f"Error fetching CEIDG data for NIP {nip}: {e}")
return None
# For testing
if __name__ == '__main__':
import sys
import json
if len(sys.argv) < 2:
print("Usage: python ceidg_api_service.py <NIP>")
print("Example: python ceidg_api_service.py 5881571773")
sys.exit(1)
nip = sys.argv[1]
print(f"Pobieranie danych z CEIDG API dla NIP: {nip}")
print("=" * 60)
data = fetch_ceidg_by_nip(nip)
if data:
print(json.dumps(data, indent=2, ensure_ascii=False, default=str))
print("=" * 60)
pkd = data.get('pkd', [])
print(f"PKD codes: {len(pkd)}")
for p in pkd[:5]:
print(f" - {p.get('kod', '?')}: {p.get('nazwa', '?')}")
if len(pkd) > 5:
print(f" ... i {len(pkd) - 5} więcej")
else:
print(f"Nie znaleziono firmy o NIP {nip} w CEIDG")