fix: Fix CEIDG enrichment - two-phase API fetch with correct field mapping
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

CEIDG enrichment was broken due to key mismatches (expected adres_ulica but API
returns adresDzialalnosci.ulica), writes to non-existent columns (address_building,
address_postal_code), and missing saves for ceidg_id/status/owner/PKD fields.

Now fetches full details via /firma/{id} endpoint (Phase 2) for complete data
including PKD list, correspondence address, and succession manager.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-18 14:58:41 +01:00
parent 79de5521f3
commit a67d069f81
2 changed files with 236 additions and 55 deletions

View File

@ -10,7 +10,7 @@ import logging
import os
import re
import time
from datetime import datetime
from datetime import datetime, date
import requests
from bs4 import BeautifulSoup
@ -450,36 +450,110 @@ def api_enrich_company_registry(company_id):
source = 'CEIDG'
updated_fields = []
if ceidg_data.get('nazwa') and not company.legal_name:
company.legal_name = ceidg_data['nazwa']
# --- CEIDG identifiers & metadata ---
if ceidg_data.get('ceidg_id'):
company.ceidg_id = ceidg_data['ceidg_id']
updated_fields.append('CEIDG ID')
if ceidg_data.get('status'):
company.ceidg_status = ceidg_data['status']
updated_fields.append('status CEIDG')
company.ceidg_raw_data = ceidg_data.get('raw')
company.ceidg_fetched_at = datetime.now()
company.data_source = 'CEIDG API'
company.last_verified_at = datetime.now()
# --- Owner ---
wlasciciel = ceidg_data.get('wlasciciel', {})
if wlasciciel.get('imie'):
company.owner_first_name = wlasciciel['imie']
updated_fields.append('właściciel')
if wlasciciel.get('nazwisko'):
company.owner_last_name = wlasciciel['nazwisko']
if ceidg_data.get('obywatelstwa'):
company.owner_citizenships = ceidg_data['obywatelstwa']
# --- Legal name ---
if ceidg_data.get('firma') and (not company.legal_name or company.legal_name == company.name):
company.legal_name = ceidg_data['firma']
updated_fields.append('nazwa pełna')
if ceidg_data.get('adres_ulica'):
company.address_street = ceidg_data['adres_ulica']
updated_fields.append('ulica')
if ceidg_data.get('adres_budynek'):
company.address_building = ceidg_data['adres_budynek']
updated_fields.append('nr budynku')
if ceidg_data.get('adres_lokal'):
company.address_apartment = ceidg_data['adres_lokal']
updated_fields.append('nr lokalu')
if ceidg_data.get('adres_kod'):
company.address_postal_code = ceidg_data['adres_kod']
updated_fields.append('kod pocztowy')
if ceidg_data.get('adres_miasto'):
company.address_city = ceidg_data['adres_miasto']
updated_fields.append('miasto')
# --- REGON ---
if not company.regon:
regon = ceidg_data.get('regon') or wlasciciel.get('regon')
if regon:
company.regon = regon
updated_fields.append('REGON')
# --- Business start date ---
if ceidg_data.get('dataRozpoczecia'):
try:
d = ceidg_data['dataRozpoczecia']
if isinstance(d, str):
company.business_start_date = date.fromisoformat(d)
updated_fields.append('data rozpoczęcia')
except (ValueError, TypeError):
pass
# --- Legal form ---
if not company.legal_form:
company.legal_form = 'JEDNOOSOBOWA DZIAŁALNOŚĆ GOSPODARCZA'
updated_fields.append('forma prawna')
# --- PKD (main) ---
pkd_gl = ceidg_data.get('pkdGlowny', {})
if pkd_gl and pkd_gl.get('kod'):
company.pkd_code = pkd_gl['kod']
company.pkd_description = pkd_gl.get('nazwa')
updated_fields.append(f'PKD główny ({pkd_gl["kod"]})')
# --- PKD (full list) ---
pkd_lista = ceidg_data.get('pkd', [])
if pkd_lista:
company.ceidg_pkd_list = pkd_lista
updated_fields.append(f'lista PKD ({len(pkd_lista)} kodów)')
# --- Business address ---
adres = ceidg_data.get('adresDzialalnosci', {})
ulica = adres.get('ulica', '')
budynek = adres.get('budynek', '')
lokal = adres.get('lokal', '')
if ulica or budynek:
street_parts = [ulica, budynek]
if lokal:
street_parts[-1] = (budynek + '/' + lokal) if budynek else lokal
company.address_street = ' '.join(p for p in street_parts if p)
updated_fields.append('adres')
if adres.get('kod') or adres.get('kodPocztowy'):
company.address_postal = adres.get('kod') or adres.get('kodPocztowy')
if adres.get('miasto') or adres.get('miejscowosc'):
company.address_city = adres.get('miasto') or adres.get('miejscowosc')
if company.address_street and company.address_postal and company.address_city:
company.address_full = f'{company.address_street}, {company.address_postal} {company.address_city}'
# --- Correspondence address ---
koresp = ceidg_data.get('adresKorespondencyjny', {})
if koresp and (koresp.get('ulica') or koresp.get('budynek')):
k_ulica = koresp.get('ulica', '')
k_budynek = koresp.get('budynek', '')
k_lokal = koresp.get('lokal', '')
k_parts = [k_ulica, k_budynek]
if k_lokal:
k_parts[-1] = (k_budynek + '/' + k_lokal) if k_budynek else k_lokal
company.correspondence_street = ' '.join(p for p in k_parts if p)
company.correspondence_postal = koresp.get('kod')
company.correspondence_city = koresp.get('miasto')
updated_fields.append('adres korespondencyjny')
# --- Contact (only if empty) ---
if ceidg_data.get('email') and not company.email:
company.email = ceidg_data['email']
updated_fields.append('email')
if ceidg_data.get('www') and not company.website:
company.website = ceidg_data['www']
if ceidg_data.get('stronaWWW') and not company.website:
company.website = ceidg_data['stronaWWW']
updated_fields.append('strona www')
if ceidg_data.get('telefon') and not company.phone:
company.phone = ceidg_data['telefon']
updated_fields.append('telefon')
if ceidg_data.get('regon') and not company.regon:
company.regon = ceidg_data['regon']
updated_fields.append('REGON')
details = {'updated_fields': updated_fields}
message_parts.append(f'zaktualizowano {len(updated_fields)} pól')

View File

@ -4,10 +4,12 @@ CEIDG API Service
==================
Service module for fetching company data from CEIDG (Centralna Ewidencja
i Informacja o Działalności Gospodarczej) using the official API at
i Informacja o Działalności Gospodarczej) using the official API v3 at
dane.biznes.gov.pl.
Provides fetch_ceidg_by_nip function for membership application workflow.
Two-phase fetching:
Phase 1: GET /firmy?nip=X find company ID
Phase 2: GET /firma/{id} full details (PKD list, correspondence address, etc.)
"""
import os
@ -22,20 +24,32 @@ load_dotenv()
logger = logging.getLogger(__name__)
# API Configuration
CEIDG_API_V3_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firmy"
CEIDG_API_BASE = "https://dane.biznes.gov.pl/api/ceidg/v3"
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
CEIDG_TIMEOUT = 15 # seconds
def _fetch_firma_detail(firma_id: str, headers: dict) -> Optional[Dict[str, Any]]:
"""Phase 2: Fetch full company details from /firma/{id} endpoint."""
url = f"{CEIDG_API_BASE}/firma/{firma_id}"
try:
response = requests.get(url, headers=headers, timeout=CEIDG_TIMEOUT)
if response.status_code == 200:
return response.json()
logger.warning(f"CEIDG detail endpoint returned {response.status_code} for id={firma_id}")
except Exception as e:
logger.warning(f"CEIDG detail fetch failed for id={firma_id}: {e}")
return None
def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
"""
Fetch company data from CEIDG API by NIP.
Fetch company data from CEIDG API by NIP (two-phase).
Args:
nip: NIP number (10 digits, no dashes)
Phase 1: Search by NIP via /firmy?nip=X
Phase 2: Get full details via /firma/{id}
Returns:
Dictionary with company data or None if not found
Returns normalized dict with all available CEIDG data, or None.
"""
if not CEIDG_API_KEY:
logger.warning("CEIDG_API_KEY not configured - CEIDG lookup disabled")
@ -53,10 +67,10 @@ def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
}
try:
logger.info(f"Fetching CEIDG data for NIP {nip}")
# --- Phase 1: Search by NIP ---
logger.info(f"CEIDG Phase 1: searching for NIP {nip}")
response = requests.get(
CEIDG_API_V3_URL,
f"{CEIDG_API_BASE}/firmy",
params={"nip": nip},
headers=headers,
timeout=CEIDG_TIMEOUT
@ -65,62 +79,148 @@ def fetch_ceidg_by_nip(nip: str) -> Optional[Dict[str, Any]]:
if response.status_code == 401:
logger.error("CEIDG API authentication failed - check CEIDG_API_KEY")
return None
if response.status_code == 404:
logger.info(f"NIP {nip} not found in CEIDG")
return None
if response.status_code != 200:
logger.error(f"CEIDG API error: {response.status_code} - {response.text[:200]}")
return None
data = response.json()
# Handle response format - can be list or dict
# Handle response format - can be list or dict with 'firmy' key
if isinstance(data, list):
if not data:
logger.info(f"NIP {nip} not found in CEIDG (empty list)")
return None
firma = data[0]
firma_summary = data[0]
elif isinstance(data, dict):
if 'firmy' in data:
firmy = data.get('firmy', [])
if not firmy:
logger.info(f"NIP {nip} not found in CEIDG")
return None
firma = firmy[0]
firma_summary = firmy[0]
else:
firma = data
firma_summary = data
else:
logger.error(f"Unexpected CEIDG response format: {type(data)}")
return None
# Extract address
adres = firma.get('adresDzialalnosci', {}) or firma.get('adres', {}) or {}
if isinstance(adres, str):
adres = {'full': adres}
# --- Phase 2: Fetch full details ---
firma_id = firma_summary.get('id')
firma = firma_summary # fallback if detail fetch fails
if firma_id:
logger.info(f"CEIDG Phase 2: fetching details for id={firma_id}")
detail = _fetch_firma_detail(firma_id, headers)
if detail:
firma = detail
logger.info(f"CEIDG Phase 2: got full details for id={firma_id}")
else:
logger.info(f"CEIDG Phase 2: detail unavailable, using summary data")
else:
# Try extracting id from 'link' field
link = firma_summary.get('link', '')
if '/firma/' in link:
firma_id = link.split('/firma/')[-1].strip('/')
detail = _fetch_firma_detail(firma_id, headers)
if detail:
firma = detail
# --- Build normalized result ---
# Owner info
wlasciciel = firma.get('wlasciciel', {}) or {}
# Address - handle both nested and flat formats
adres = firma.get('adresDzialalnosci', {}) or {}
if isinstance(adres, str):
adres = {}
# Correspondence address
adres_koresp = firma.get('adresKorespondencyjny', {}) or {}
if isinstance(adres_koresp, str):
adres_koresp = {}
# PKD - main and full list
pkd_glowny = firma.get('pkdGlowny', {}) or {}
pkd_lista = firma.get('pkd', []) or []
# Some responses use 'pkdPozostale' for additional PKD codes
if not pkd_lista:
pkd_pozostale = firma.get('pkdPozostale', []) or []
if pkd_glowny and pkd_pozostale:
pkd_lista = [pkd_glowny] + pkd_pozostale
elif pkd_glowny:
pkd_lista = [pkd_glowny]
# Succession manager
zarzadca = firma.get('zarzadcaSukcesyjny', {}) or {}
# Build normalized result
result = {
'firma': firma.get('nazwa') or firma.get('nazwaSkrocona'),
'nip': firma.get('nip'),
'regon': firma.get('regon'),
# Identity
'firma': firma.get('nazwa') or firma.get('firma') or firma.get('nazwaSkrocona'),
'nip': firma.get('nip') or nip,
'regon': firma.get('regon') or wlasciciel.get('regon'),
'ceidg_id': firma_id or firma.get('id'),
'status': firma.get('status'),
# Owner
'wlasciciel': {
'imie': wlasciciel.get('imie'),
'nazwisko': wlasciciel.get('nazwisko'),
} if wlasciciel else {},
'obywatelstwa': firma.get('obywatelstwa', []) or wlasciciel.get('obywatelstwa', []) or [],
# Dates
'dataRozpoczecia': (
firma.get('dataRozpoczeciaDzialalnosci')
or firma.get('dataWpisuDoCeidg')
),
# Business address
'adresDzialalnosci': {
'kodPocztowy': adres.get('kodPocztowy') or adres.get('kod'),
'miejscowosc': adres.get('miejscowosc') or adres.get('miasto'),
'ulica': adres.get('ulica'),
'budynek': adres.get('budynek') or adres.get('nrDomu') or adres.get('nrBudynku'),
'lokal': adres.get('lokal') or adres.get('nrLokalu'),
'kod': adres.get('kodPocztowy') or adres.get('kod'),
'miasto': adres.get('miejscowosc') or adres.get('miasto'),
'wojewodztwo': adres.get('wojewodztwo'),
'powiat': adres.get('powiat'),
'gmina': adres.get('gmina'),
},
# Correspondence address
'adresKorespondencyjny': {
'ulica': adres_koresp.get('ulica'),
'budynek': adres_koresp.get('budynek') or adres_koresp.get('nrDomu'),
'lokal': adres_koresp.get('lokal') or adres_koresp.get('nrLokalu'),
'kod': adres_koresp.get('kodPocztowy') or adres_koresp.get('kod'),
'miasto': adres_koresp.get('miejscowosc') or adres_koresp.get('miasto'),
} if adres_koresp else {},
# PKD
'pkdGlowny': pkd_glowny,
'pkd': pkd_lista,
# Contact
'email': firma.get('email') or firma.get('adresEmail'),
'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'),
'telefon': firma.get('telefon'),
'dataRozpoczeciaDzialalnosci': firma.get('dataRozpoczeciaDzialalnosci') or firma.get('dataWpisuDoCeidg'),
'status': firma.get('status'),
'raw': firma
'stronaWWW': firma.get('stronaWWW') or firma.get('www') or firma.get('strona'),
# Succession
'zarzadcaSukcesyjny': {
'imie': zarzadca.get('imie'),
'nazwisko': zarzadca.get('nazwisko'),
} if zarzadca.get('imie') or zarzadca.get('nazwisko') else None,
# Electronic delivery address
'adresDoreczenElektronicznych': firma.get('adresDoreczenElektronicznych'),
# Raw API response for template access
'raw': firma,
}
logger.info(f"CEIDG data found for NIP {nip}: {result['firma']}")
logger.info(f"CEIDG data found for NIP {nip}: {result['firma']} (PKD codes: {len(pkd_lista)})")
return result
except requests.exceptions.Timeout:
@ -152,5 +252,12 @@ if __name__ == '__main__':
if data:
print(json.dumps(data, indent=2, ensure_ascii=False, default=str))
print("=" * 60)
pkd = data.get('pkd', [])
print(f"PKD codes: {len(pkd)}")
for p in pkd[:5]:
print(f" - {p.get('kod', '?')}: {p.get('nazwa', '?')}")
if len(pkd) > 5:
print(f" ... i {len(pkd) - 5} więcej")
else:
print(f"Nie znaleziono firmy o NIP {nip} w CEIDG")