fix: Fix CEIDG enrichment - two-phase API fetch with correct field mapping
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

CEIDG enrichment was broken due to key mismatches (expected adres_ulica but API
returns adresDzialalnosci.ulica), writes to non-existent columns (address_building,
address_postal_code), and missing saves for ceidg_id/status/owner/PKD fields.

Now fetches full details via /firma/{id} endpoint (Phase 2) for complete data
including PKD list, correspondence address, and succession manager.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-18 14:58:41 +01:00
parent 79de5521f3
commit a67d069f81
2 changed files with 236 additions and 55 deletions

View File

@ -10,7 +10,7 @@ import logging
import os import os
import re import re
import time import time
from datetime import datetime from datetime import datetime, date
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -450,36 +450,110 @@ def api_enrich_company_registry(company_id):
source = 'CEIDG' source = 'CEIDG'
updated_fields = [] updated_fields = []
if ceidg_data.get('nazwa') and not company.legal_name: # --- CEIDG identifiers & metadata ---
company.legal_name = ceidg_data['nazwa'] if ceidg_data.get('ceidg_id'):
company.ceidg_id = ceidg_data['ceidg_id']
updated_fields.append('CEIDG ID')
if ceidg_data.get('status'):
company.ceidg_status = ceidg_data['status']
updated_fields.append('status CEIDG')
company.ceidg_raw_data = ceidg_data.get('raw')
company.ceidg_fetched_at = datetime.now()
company.data_source = 'CEIDG API'
company.last_verified_at = datetime.now()
# --- Owner ---
wlasciciel = ceidg_data.get('wlasciciel', {})
if wlasciciel.get('imie'):
company.owner_first_name = wlasciciel['imie']
updated_fields.append('właściciel')
if wlasciciel.get('nazwisko'):
company.owner_last_name = wlasciciel['nazwisko']
if ceidg_data.get('obywatelstwa'):
company.owner_citizenships = ceidg_data['obywatelstwa']
# --- Legal name ---
if ceidg_data.get('firma') and (not company.legal_name or company.legal_name == company.name):
company.legal_name = ceidg_data['firma']
updated_fields.append('nazwa pełna') updated_fields.append('nazwa pełna')
if ceidg_data.get('adres_ulica'):
company.address_street = ceidg_data['adres_ulica'] # --- REGON ---
updated_fields.append('ulica') if not company.regon:
if ceidg_data.get('adres_budynek'): regon = ceidg_data.get('regon') or wlasciciel.get('regon')
company.address_building = ceidg_data['adres_budynek'] if regon:
updated_fields.append('nr budynku') company.regon = regon
if ceidg_data.get('adres_lokal'): updated_fields.append('REGON')
company.address_apartment = ceidg_data['adres_lokal']
updated_fields.append('nr lokalu') # --- Business start date ---
if ceidg_data.get('adres_kod'): if ceidg_data.get('dataRozpoczecia'):
company.address_postal_code = ceidg_data['adres_kod'] try:
updated_fields.append('kod pocztowy') d = ceidg_data['dataRozpoczecia']
if ceidg_data.get('adres_miasto'): if isinstance(d, str):
company.address_city = ceidg_data['adres_miasto'] company.business_start_date = date.fromisoformat(d)
updated_fields.append('miasto') updated_fields.append('data rozpoczęcia')
except (ValueError, TypeError):
pass
# --- Legal form ---
if not company.legal_form:
company.legal_form = 'JEDNOOSOBOWA DZIAŁALNOŚĆ GOSPODARCZA'
updated_fields.append('forma prawna')
# --- PKD (main) ---
pkd_gl = ceidg_data.get('pkdGlowny', {})
if pkd_gl and pkd_gl.get('kod'):
company.pkd_code = pkd_gl['kod']
company.pkd_description = pkd_gl.get('nazwa')
updated_fields.append(f'PKD główny ({pkd_gl["kod"]})')
# --- PKD (full list) ---
pkd_lista = ceidg_data.get('pkd', [])
if pkd_lista:
company.ceidg_pkd_list = pkd_lista
updated_fields.append(f'lista PKD ({len(pkd_lista)} kodów)')
# --- Business address ---
adres = ceidg_data.get('adresDzialalnosci', {})
ulica = adres.get('ulica', '')
budynek = adres.get('budynek', '')
lokal = adres.get('lokal', '')
if ulica or budynek:
street_parts = [ulica, budynek]
if lokal:
street_parts[-1] = (budynek + '/' + lokal) if budynek else lokal
company.address_street = ' '.join(p for p in street_parts if p)
updated_fields.append('adres')
if adres.get('kod') or adres.get('kodPocztowy'):
company.address_postal = adres.get('kod') or adres.get('kodPocztowy')
if adres.get('miasto') or adres.get('miejscowosc'):
company.address_city = adres.get('miasto') or adres.get('miejscowosc')
if company.address_street and company.address_postal and company.address_city:
company.address_full = f'{company.address_street}, {company.address_postal} {company.address_city}'
# --- Correspondence address ---
koresp = ceidg_data.get('adresKorespondencyjny', {})
if koresp and (koresp.get('ulica') or koresp.get('budynek')):
k_ulica = koresp.get('ulica', '')
k_budynek = koresp.get('budynek', '')
k_lokal = koresp.get('lokal', '')
k_parts = [k_ulica, k_budynek]
if k_lokal:
k_parts[-1] = (k_budynek + '/' + k_lokal) if k_budynek else k_lokal
company.correspondence_street = ' '.join(p for p in k_parts if p)
company.correspondence_postal = koresp.get('kod')
company.correspondence_city = koresp.get('miasto')
updated_fields.append('adres korespondencyjny')
# --- Contact (only if empty) ---
if ceidg_data.get('email') and not company.email: if ceidg_data.get('email') and not company.email:
company.email = ceidg_data['email'] company.email = ceidg_data['email']
updated_fields.append('email') updated_fields.append('email')
if ceidg_data.get('www') and not company.website: if ceidg_data.get('stronaWWW') and not company.website:
company.website = ceidg_data['www'] company.website = ceidg_data['stronaWWW']
updated_fields.append('strona www') updated_fields.append('strona www')
if ceidg_data.get('telefon') and not company.phone: if ceidg_data.get('telefon') and not company.phone:
company.phone = ceidg_data['telefon'] company.phone = ceidg_data['telefon']
updated_fields.append('telefon') updated_fields.append('telefon')
if ceidg_data.get('regon') and not company.regon:
company.regon = ceidg_data['regon']
updated_fields.append('REGON')
details = {'updated_fields': updated_fields} details = {'updated_fields': updated_fields}
message_parts.append(f'zaktualizowano {len(updated_fields)} pól') message_parts.append(f'zaktualizowano {len(updated_fields)} pól')

View File

@ -4,10 +4,12 @@ CEIDG API Service
================== ==================
Service module for fetching company data from CEIDG (Centralna Ewidencja Service module for fetching company data from CEIDG (Centralna Ewidencja
i Informacja o Działalności Gospodarczej) using the official API at i Informacja o Działalności Gospodarczej) using the official API v3 at
dane.biznes.gov.pl. dane.biznes.gov.pl.
Provides fetch_ceidg_by_nip function for membership application workflow. Two-phase fetching:
Phase 1: GET /firmy?nip=X find company ID
Phase 2: GET /firma/{id} full details (PKD list, correspondence address, etc.)
""" """
import os import os
@ -22,20 +24,32 @@ load_dotenv()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# API Configuration # API Configuration
CEIDG_API_V3_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firmy" CEIDG_API_BASE = "https://dane.biznes.gov.pl/api/ceidg/v3"
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY") CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
CEIDG_TIMEOUT = 15 # seconds CEIDG_TIMEOUT = 15 # seconds
def _fetch_firma_detail(firma_id: str, headers: dict) -> Optional[Dict[str, Any]]:
"""Phase 2: Fetch full company details from /firma/{id} endpoint."""
url = f"{CEIDG_API_BASE}/firma/{firma_id}"
try:
response = requests.get(url, headers=headers, timeout=CEIDG_TIMEOUT)
if response.status_code == 200:
return response.json()
logger.warning(f"CEIDG detail endpoint returned {response.status_code} for id={firma_id}")
except Exception as e:
logger.warning(f"CEIDG detail fetch failed for id={firma_id}: {e}")
return None
def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]: def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
""" """
Fetch company data from CEIDG API by NIP. Fetch company data from CEIDG API by NIP (two-phase).
Args: Phase 1: Search by NIP via /firmy?nip=X
nip: NIP number (10 digits, no dashes) Phase 2: Get full details via /firma/{id}
Returns: Returns normalized dict with all available CEIDG data, or None.
Dictionary with company data or None if not found
""" """
if not CEIDG_API_KEY: if not CEIDG_API_KEY:
logger.warning("CEIDG_API_KEY not configured - CEIDG lookup disabled") logger.warning("CEIDG_API_KEY not configured - CEIDG lookup disabled")
@ -53,10 +67,10 @@ def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
} }
try: try:
logger.info(f"Fetching CEIDG data for NIP {nip}") # --- Phase 1: Search by NIP ---
logger.info(f"CEIDG Phase 1: searching for NIP {nip}")
response = requests.get( response = requests.get(
CEIDG_API_V3_URL, f"{CEIDG_API_BASE}/firmy",
params={"nip": nip}, params={"nip": nip},
headers=headers, headers=headers,
timeout=CEIDG_TIMEOUT timeout=CEIDG_TIMEOUT
@ -65,62 +79,148 @@ def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
if response.status_code == 401: if response.status_code == 401:
logger.error("CEIDG API authentication failed - check CEIDG_API_KEY") logger.error("CEIDG API authentication failed - check CEIDG_API_KEY")
return None return None
if response.status_code == 404: if response.status_code == 404:
logger.info(f"NIP {nip} not found in CEIDG") logger.info(f"NIP {nip} not found in CEIDG")
return None return None
if response.status_code != 200: if response.status_code != 200:
logger.error(f"CEIDG API error: {response.status_code} - {response.text[:200]}") logger.error(f"CEIDG API error: {response.status_code} - {response.text[:200]}")
return None return None
data = response.json() data = response.json()
# Handle response format - can be list or dict # Handle response format - can be list or dict with 'firmy' key
if isinstance(data, list): if isinstance(data, list):
if not data: if not data:
logger.info(f"NIP {nip} not found in CEIDG (empty list)") logger.info(f"NIP {nip} not found in CEIDG (empty list)")
return None return None
firma = data[0] firma_summary = data[0]
elif isinstance(data, dict): elif isinstance(data, dict):
if 'firmy' in data: if 'firmy' in data:
firmy = data.get('firmy', []) firmy = data.get('firmy', [])
if not firmy: if not firmy:
logger.info(f"NIP {nip} not found in CEIDG") logger.info(f"NIP {nip} not found in CEIDG")
return None return None
firma = firmy[0] firma_summary = firmy[0]
else: else:
firma = data firma_summary = data
else: else:
logger.error(f"Unexpected CEIDG response format: {type(data)}") logger.error(f"Unexpected CEIDG response format: {type(data)}")
return None return None
# Extract address # --- Phase 2: Fetch full details ---
adres = firma.get('adresDzialalnosci', {}) or firma.get('adres', {}) or {} firma_id = firma_summary.get('id')
if isinstance(adres, str): firma = firma_summary # fallback if detail fetch fails
adres = {'full': adres}
if firma_id:
logger.info(f"CEIDG Phase 2: fetching details for id={firma_id}")
detail = _fetch_firma_detail(firma_id, headers)
if detail:
firma = detail
logger.info(f"CEIDG Phase 2: got full details for id={firma_id}")
else:
logger.info(f"CEIDG Phase 2: detail unavailable, using summary data")
else:
# Try extracting id from 'link' field
link = firma_summary.get('link', '')
if '/firma/' in link:
firma_id = link.split('/firma/')[-1].strip('/')
detail = _fetch_firma_detail(firma_id, headers)
if detail:
firma = detail
# --- Build normalized result ---
# Owner info
wlasciciel = firma.get('wlasciciel', {}) or {}
# Address - handle both nested and flat formats
adres = firma.get('adresDzialalnosci', {}) or {}
if isinstance(adres, str):
adres = {}
# Correspondence address
adres_koresp = firma.get('adresKorespondencyjny', {}) or {}
if isinstance(adres_koresp, str):
adres_koresp = {}
# PKD - main and full list
pkd_glowny = firma.get('pkdGlowny', {}) or {}
pkd_lista = firma.get('pkd', []) or []
# Some responses use 'pkdPozostale' for additional PKD codes
if not pkd_lista:
pkd_pozostale = firma.get('pkdPozostale', []) or []
if pkd_glowny and pkd_pozostale:
pkd_lista = [pkd_glowny] + pkd_pozostale
elif pkd_glowny:
pkd_lista = [pkd_glowny]
# Succession manager
zarzadca = firma.get('zarzadcaSukcesyjny', {}) or {}
# Build normalized result
result = { result = {
'firma': firma.get('nazwa') or firma.get('nazwaSkrocona'), # Identity
'nip': firma.get('nip'), 'firma': firma.get('nazwa') or firma.get('firma') or firma.get('nazwaSkrocona'),
'regon': firma.get('regon'), 'nip': firma.get('nip') or nip,
'regon': firma.get('regon') or wlasciciel.get('regon'),
'ceidg_id': firma_id or firma.get('id'),
'status': firma.get('status'),
# Owner
'wlasciciel': {
'imie': wlasciciel.get('imie'),
'nazwisko': wlasciciel.get('nazwisko'),
} if wlasciciel else {},
'obywatelstwa': firma.get('obywatelstwa', []) or wlasciciel.get('obywatelstwa', []) or [],
# Dates
'dataRozpoczecia': (
firma.get('dataRozpoczeciaDzialalnosci')
or firma.get('dataWpisuDoCeidg')
),
# Business address
'adresDzialalnosci': { 'adresDzialalnosci': {
'kodPocztowy': adres.get('kodPocztowy') or adres.get('kod'),
'miejscowosc': adres.get('miejscowosc') or adres.get('miasto'),
'ulica': adres.get('ulica'), 'ulica': adres.get('ulica'),
'budynek': adres.get('budynek') or adres.get('nrDomu') or adres.get('nrBudynku'), 'budynek': adres.get('budynek') or adres.get('nrDomu') or adres.get('nrBudynku'),
'lokal': adres.get('lokal') or adres.get('nrLokalu'), 'lokal': adres.get('lokal') or adres.get('nrLokalu'),
'kod': adres.get('kodPocztowy') or adres.get('kod'),
'miasto': adres.get('miejscowosc') or adres.get('miasto'),
'wojewodztwo': adres.get('wojewodztwo'),
'powiat': adres.get('powiat'),
'gmina': adres.get('gmina'),
}, },
# Correspondence address
'adresKorespondencyjny': {
'ulica': adres_koresp.get('ulica'),
'budynek': adres_koresp.get('budynek') or adres_koresp.get('nrDomu'),
'lokal': adres_koresp.get('lokal') or adres_koresp.get('nrLokalu'),
'kod': adres_koresp.get('kodPocztowy') or adres_koresp.get('kod'),
'miasto': adres_koresp.get('miejscowosc') or adres_koresp.get('miasto'),
} if adres_koresp else {},
# PKD
'pkdGlowny': pkd_glowny,
'pkd': pkd_lista,
# Contact
'email': firma.get('email') or firma.get('adresEmail'), 'email': firma.get('email') or firma.get('adresEmail'),
'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'),
'telefon': firma.get('telefon'), 'telefon': firma.get('telefon'),
'dataRozpoczeciaDzialalnosci': firma.get('dataRozpoczeciaDzialalnosci') or firma.get('dataWpisuDoCeidg'), 'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'),
'status': firma.get('status'),
'raw': firma # Succession
'zarzadcaSukcesyjny': {
'imie': zarzadca.get('imie'),
'nazwisko': zarzadca.get('nazwisko'),
} if zarzadca.get('imie') or zarzadca.get('nazwisko') else None,
# Electronic delivery address
'adresDoreczenElektronicznych': firma.get('adresDoreczenElektronicznych'),
# Raw API response for template access
'raw': firma,
} }
logger.info(f"CEIDG data found for NIP {nip}: {result['firma']}") logger.info(f"CEIDG data found for NIP {nip}: {result['firma']} (PKD codes: {len(pkd_lista)})")
return result return result
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
@ -152,5 +252,12 @@ if __name__ == '__main__':
if data: if data:
print(json.dumps(data, indent=2, ensure_ascii=False, default=str)) print(json.dumps(data, indent=2, ensure_ascii=False, default=str))
print("=" * 60)
pkd = data.get('pkd', [])
print(f"PKD codes: {len(pkd)}")
for p in pkd[:5]:
print(f" - {p.get('kod', '?')}: {p.get('nazwa', '?')}")
if len(pkd) > 5:
print(f" ... i {len(pkd) - 5} więcej")
else: else:
print(f"Nie znaleziono firmy o NIP {nip} w CEIDG") print(f"Nie znaleziono firmy o NIP {nip} w CEIDG")