Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Detail endpoint uses 'dataRozpoczecia' (short), not 'dataRozpoczeciaDzialalnosci'. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
276 lines
9.7 KiB
Python
276 lines
9.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CEIDG API Service
|
|
==================
|
|
|
|
Service module for fetching company data from CEIDG (Centralna Ewidencja
|
|
i Informacja o Działalności Gospodarczej) using the official API v3 at
|
|
dane.biznes.gov.pl.
|
|
|
|
Two-phase fetching:
|
|
Phase 1: GET /firmy?nip=X → find company ID
|
|
Phase 2: GET /firma/{id} → full details (PKD list, correspondence address, etc.)
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
import requests
|
|
from typing import Optional, Dict, Any
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# API Configuration
|
|
CEIDG_API_BASE = "https://dane.biznes.gov.pl/api/ceidg/v3"
|
|
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
|
|
CEIDG_TIMEOUT = 15 # seconds
|
|
|
|
|
|
def _fetch_firma_detail(firma_id: str, headers: dict) -> Optional[Dict[str, Any]]:
|
|
"""Phase 2: Fetch full company details from /firma/{id} endpoint.
|
|
|
|
The API returns {"firma": [{...}]} — unwrap to get the actual firm dict.
|
|
"""
|
|
url = f"{CEIDG_API_BASE}/firma/{firma_id}"
|
|
try:
|
|
response = requests.get(url, headers=headers, timeout=CEIDG_TIMEOUT)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
# Unwrap: /firma/{id} returns {"firma": [{...company data...}]}
|
|
if isinstance(data, dict) and 'firma' in data:
|
|
firmy = data['firma']
|
|
if isinstance(firmy, list) and firmy:
|
|
return firmy[0]
|
|
if isinstance(firmy, dict):
|
|
return firmy
|
|
return data
|
|
logger.warning(f"CEIDG detail endpoint returned {response.status_code} for id={firma_id}")
|
|
except Exception as e:
|
|
logger.warning(f"CEIDG detail fetch failed for id={firma_id}: {e}")
|
|
return None
|
|
|
|
|
|
def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Fetch company data from CEIDG API by NIP (two-phase).
|
|
|
|
Phase 1: Search by NIP via /firmy?nip=X
|
|
Phase 2: Get full details via /firma/{id}
|
|
|
|
Returns normalized dict with all available CEIDG data, or None.
|
|
"""
|
|
if not CEIDG_API_KEY:
|
|
logger.warning("CEIDG_API_KEY not configured - CEIDG lookup disabled")
|
|
return None
|
|
|
|
# Clean NIP
|
|
nip = nip.strip().replace('-', '').replace(' ', '')
|
|
if not nip or len(nip) != 10 or not nip.isdigit():
|
|
logger.warning(f"Invalid NIP format: {nip}")
|
|
return None
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {CEIDG_API_KEY}",
|
|
"Accept": "application/json"
|
|
}
|
|
|
|
try:
|
|
# --- Phase 1: Search by NIP ---
|
|
logger.info(f"CEIDG Phase 1: searching for NIP {nip}")
|
|
response = requests.get(
|
|
f"{CEIDG_API_BASE}/firmy",
|
|
params={"nip": nip},
|
|
headers=headers,
|
|
timeout=CEIDG_TIMEOUT
|
|
)
|
|
|
|
if response.status_code == 401:
|
|
logger.error("CEIDG API authentication failed - check CEIDG_API_KEY")
|
|
return None
|
|
if response.status_code == 404:
|
|
logger.info(f"NIP {nip} not found in CEIDG")
|
|
return None
|
|
if response.status_code != 200:
|
|
logger.error(f"CEIDG API error: {response.status_code} - {response.text[:200]}")
|
|
return None
|
|
|
|
data = response.json()
|
|
|
|
# Handle response format - can be list or dict with 'firmy' key
|
|
if isinstance(data, list):
|
|
if not data:
|
|
logger.info(f"NIP {nip} not found in CEIDG (empty list)")
|
|
return None
|
|
firma_summary = data[0]
|
|
elif isinstance(data, dict):
|
|
if 'firmy' in data:
|
|
firmy = data.get('firmy', [])
|
|
if not firmy:
|
|
logger.info(f"NIP {nip} not found in CEIDG")
|
|
return None
|
|
firma_summary = firmy[0]
|
|
else:
|
|
firma_summary = data
|
|
else:
|
|
logger.error(f"Unexpected CEIDG response format: {type(data)}")
|
|
return None
|
|
|
|
# --- Phase 2: Fetch full details ---
|
|
firma_id = firma_summary.get('id')
|
|
firma = firma_summary # fallback if detail fetch fails
|
|
|
|
if firma_id:
|
|
logger.info(f"CEIDG Phase 2: fetching details for id={firma_id}")
|
|
detail = _fetch_firma_detail(firma_id, headers)
|
|
if detail:
|
|
firma = detail
|
|
logger.info(f"CEIDG Phase 2: got full details for id={firma_id}")
|
|
else:
|
|
logger.info(f"CEIDG Phase 2: detail unavailable, using summary data")
|
|
else:
|
|
# Try extracting id from 'link' field
|
|
link = firma_summary.get('link', '')
|
|
if '/firma/' in link:
|
|
firma_id = link.split('/firma/')[-1].strip('/')
|
|
detail = _fetch_firma_detail(firma_id, headers)
|
|
if detail:
|
|
firma = detail
|
|
|
|
# --- Build normalized result ---
|
|
# Owner info
|
|
wlasciciel = firma.get('wlasciciel', {}) or {}
|
|
|
|
# Address - handle both nested and flat formats
|
|
adres = firma.get('adresDzialalnosci', {}) or {}
|
|
if isinstance(adres, str):
|
|
adres = {}
|
|
|
|
# Correspondence address
|
|
adres_koresp = firma.get('adresKorespondencyjny', {}) or {}
|
|
if isinstance(adres_koresp, str):
|
|
adres_koresp = {}
|
|
|
|
# PKD - main and full list
|
|
pkd_glowny = firma.get('pkdGlowny', {}) or {}
|
|
pkd_lista = firma.get('pkd', []) or []
|
|
# Some responses use 'pkdPozostale' for additional PKD codes
|
|
if not pkd_lista:
|
|
pkd_pozostale = firma.get('pkdPozostale', []) or []
|
|
if pkd_glowny and pkd_pozostale:
|
|
pkd_lista = [pkd_glowny] + pkd_pozostale
|
|
elif pkd_glowny:
|
|
pkd_lista = [pkd_glowny]
|
|
|
|
# Succession manager
|
|
zarzadca = firma.get('zarzadcaSukcesyjny', {}) or {}
|
|
|
|
result = {
|
|
# Identity
|
|
'firma': firma.get('nazwa') or firma.get('firma') or firma.get('nazwaSkrocona'),
|
|
'nip': firma.get('nip') or nip,
|
|
'regon': firma.get('regon') or wlasciciel.get('regon'),
|
|
'ceidg_id': firma_id or firma.get('id'),
|
|
'status': firma.get('status'),
|
|
|
|
# Owner
|
|
'wlasciciel': {
|
|
'imie': wlasciciel.get('imie'),
|
|
'nazwisko': wlasciciel.get('nazwisko'),
|
|
} if wlasciciel else {},
|
|
'obywatelstwa': firma.get('obywatelstwa', []) or wlasciciel.get('obywatelstwa', []) or [],
|
|
|
|
# Dates
|
|
'dataRozpoczecia': (
|
|
firma.get('dataRozpoczecia')
|
|
or firma.get('dataRozpoczeciaDzialalnosci')
|
|
or firma.get('dataWpisuDoCeidg')
|
|
),
|
|
|
|
# Business address
|
|
'adresDzialalnosci': {
|
|
'ulica': adres.get('ulica'),
|
|
'budynek': adres.get('budynek') or adres.get('nrDomu') or adres.get('nrBudynku'),
|
|
'lokal': adres.get('lokal') or adres.get('nrLokalu'),
|
|
'kod': adres.get('kodPocztowy') or adres.get('kod'),
|
|
'miasto': adres.get('miejscowosc') or adres.get('miasto'),
|
|
'wojewodztwo': adres.get('wojewodztwo'),
|
|
'powiat': adres.get('powiat'),
|
|
'gmina': adres.get('gmina'),
|
|
},
|
|
|
|
# Correspondence address
|
|
'adresKorespondencyjny': {
|
|
'ulica': adres_koresp.get('ulica'),
|
|
'budynek': adres_koresp.get('budynek') or adres_koresp.get('nrDomu'),
|
|
'lokal': adres_koresp.get('lokal') or adres_koresp.get('nrLokalu'),
|
|
'kod': adres_koresp.get('kodPocztowy') or adres_koresp.get('kod'),
|
|
'miasto': adres_koresp.get('miejscowosc') or adres_koresp.get('miasto'),
|
|
} if adres_koresp else {},
|
|
|
|
# PKD
|
|
'pkdGlowny': pkd_glowny,
|
|
'pkd': pkd_lista,
|
|
|
|
# Contact
|
|
'email': firma.get('email') or firma.get('adresEmail'),
|
|
'telefon': firma.get('telefon'),
|
|
'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'),
|
|
|
|
# Succession
|
|
'zarzadcaSukcesyjny': {
|
|
'imie': zarzadca.get('imie'),
|
|
'nazwisko': zarzadca.get('nazwisko'),
|
|
} if zarzadca.get('imie') or zarzadca.get('nazwisko') else None,
|
|
|
|
# Electronic delivery address
|
|
'adresDoreczenElektronicznych': firma.get('adresDoreczenElektronicznych'),
|
|
|
|
# Raw API response for template access
|
|
'raw': firma,
|
|
}
|
|
|
|
logger.info(f"CEIDG data found for NIP {nip}: {result['firma']} (PKD codes: {len(pkd_lista)})")
|
|
return result
|
|
|
|
except requests.exceptions.Timeout:
|
|
logger.error(f"CEIDG API timeout for NIP {nip}")
|
|
return None
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"CEIDG API request error for NIP {nip}: {e}")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Error fetching CEIDG data for NIP {nip}: {e}")
|
|
return None
|
|
|
|
|
|
# For testing
|
|
if __name__ == '__main__':
|
|
import sys
|
|
import json
|
|
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python ceidg_api_service.py <NIP>")
|
|
print("Example: python ceidg_api_service.py 5881571773")
|
|
sys.exit(1)
|
|
|
|
nip = sys.argv[1]
|
|
print(f"Pobieranie danych z CEIDG API dla NIP: {nip}")
|
|
print("=" * 60)
|
|
|
|
data = fetch_ceidg_by_nip(nip)
|
|
|
|
if data:
|
|
print(json.dumps(data, indent=2, ensure_ascii=False, default=str))
|
|
print("=" * 60)
|
|
pkd = data.get('pkd', [])
|
|
print(f"PKD codes: {len(pkd)}")
|
|
for p in pkd[:5]:
|
|
print(f" - {p.get('kod', '?')}: {p.get('nazwa', '?')}")
|
|
if len(pkd) > 5:
|
|
print(f" ... i {len(pkd) - 5} więcej")
|
|
else:
|
|
print(f"Nie znaleziono firmy o NIP {nip} w CEIDG")
|