feat: Add CEIDG API name search and --missing-nip option

- Add search_ceidg_by_name() for API v3 name-based queries
- Add search_missing_nip_companies() to find NIP for companies without NIP
- Add --missing-nip flag to search for all companies missing NIP
- Add --apply-nip flag to save found NIPs to database
- Fix API endpoint: /api/ceidg/v3/firmy (not /firma)
- Correctly extract NIP from wlasciciel object in response

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-01 07:16:22 +01:00
parent 99f7a5a88a
commit 79a1a60621

View File

@ -1,13 +1,15 @@
#!/usr/bin/env python3
"""
CEIDG API v3 Client - pobiera dane właścicieli JDG
CEIDG API Client - pobiera dane właścicieli JDG
Używa oficjalnego API CEIDG v3 (dane.biznes.gov.pl) do pobierania
Używa oficjalnego API CEIDG (dane.biznes.gov.pl) do pobierania
danych o jednoosobowych działalnościach gospodarczych.
Usage:
python scripts/fetch_ceidg_api.py --nip 5881571773
python scripts/fetch_ceidg_api.py --name "Kancelaria Notarialna"
python scripts/fetch_ceidg_api.py --all # wszystkie JDG z bazy
python scripts/fetch_ceidg_api.py --missing-nip # firmy bez NIP
python scripts/fetch_ceidg_api.py --all --import # pobierz i importuj do bazy
"""
@ -30,7 +32,8 @@ from dotenv import load_dotenv
load_dotenv(Path(__file__).parent.parent / '.env')
# API Configuration
CEIDG_API_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firma"
# API v3 - main endpoint for company queries (supports NIP, REGON, nazwa, etc.)
CEIDG_API_V3_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firmy"
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
# Output directory for JSON cache
@ -94,7 +97,7 @@ def fetch_ceidg_data(nip: str) -> Optional[CEIDGData]:
try:
response = requests.get(
CEIDG_API_URL,
CEIDG_API_V3_URL,
params={"nip": nip},
headers=headers,
timeout=30
@ -175,6 +178,166 @@ def fetch_ceidg_data(nip: str) -> Optional[CEIDGData]:
return None
def search_ceidg_by_name(nazwa: str, miasto: str = None) -> List[dict]:
"""
Wyszukuje firmy w CEIDG po nazwie używając API v3.
Args:
nazwa: Nazwa firmy do wyszukania
miasto: Opcjonalnie miasto do zawężenia wyników
Returns:
Lista słowników z danymi firm
"""
if not CEIDG_API_KEY:
print(" [ERROR] Brak CEIDG_API_KEY w .env")
return []
print(f" [INFO] Wyszukiwanie w CEIDG: '{nazwa}'...")
headers = {
"Authorization": f"Bearer {CEIDG_API_KEY}",
"Accept": "application/json"
}
params = {"nazwa": nazwa}
if miasto:
params["miasto"] = miasto
try:
response = requests.get(
CEIDG_API_V3_URL,
params=params,
headers=headers,
timeout=30
)
if response.status_code == 204:
print(f" [WARN] Brak wyników dla '{nazwa}'")
return []
if response.status_code == 401:
print(f" [ERROR] Błąd autoryzacji - sprawdź CEIDG_API_KEY")
return []
if response.status_code != 200:
print(f" [ERROR] HTTP {response.status_code}: {response.text[:200]}")
return []
data = response.json()
if "firmy" not in data or not data["firmy"]:
print(f" [WARN] Brak wyników dla '{nazwa}'")
return []
results = data["firmy"]
print(f" [OK] Znaleziono {len(results)} wyników")
return results
except requests.RequestException as e:
print(f" [ERROR] Błąd połączenia: {e}")
return []
except json.JSONDecodeError as e:
print(f" [ERROR] Błąd parsowania JSON: {e}")
return []
def search_missing_nip_companies() -> List[dict]:
"""
Wyszukuje NIP dla firm bez NIP w bazie, używając nazwy firmy.
Returns:
Lista znalezionych dopasowań
"""
from database import SessionLocal, Company
from difflib import SequenceMatcher
db = SessionLocal()
results = []
try:
# Pobierz firmy bez NIP
companies = db.query(Company).filter(
(Company.nip.is_(None)) | (Company.nip == '')
).order_by(Company.name).all()
print(f"\n=== Wyszukiwanie NIP dla {len(companies)} firm ===\n")
for i, company in enumerate(companies):
print(f"[{i+1}/{len(companies)}] {company.name}")
# Szukaj po nazwie
search_results = search_ceidg_by_name(
company.name,
miasto=company.address_city
)
if not search_results:
time.sleep(0.5)
continue
# Szukaj najlepszego dopasowania
best_match = None
best_score = 0.0
for result in search_results:
result_name = result.get("nazwa", "")
# Oblicz podobieństwo nazwy
score = SequenceMatcher(
None,
company.name.lower(),
result_name.lower()
).ratio()
# Bonus za zgodne miasto
if company.address_city and result.get("adresDzialalnosci", {}).get("miasto"):
if company.address_city.lower() in result["adresDzialalnosci"]["miasto"].lower():
score += 0.2
if score > best_score:
best_score = score
best_match = result
if best_match and best_score >= 0.6:
# NIP is in wlasciciel object
wlasciciel = best_match.get("wlasciciel", {})
nip = wlasciciel.get("nip")
regon = wlasciciel.get("regon")
found_name = best_match.get("nazwa", "")
status = best_match.get("status", "")
if not nip:
print(f" ✗ Znaleziono firmę ale brak NIP w odpowiedzi")
continue
confidence = "high" if best_score >= 0.8 else "medium"
results.append({
"company_id": company.id,
"company_name": company.name,
"found_nip": nip,
"found_regon": regon,
"found_name": found_name,
"found_status": status,
"score": round(best_score, 2),
"confidence": confidence
})
print(f" ✓ Znaleziono: NIP {nip} ({confidence}, score: {best_score:.2f})")
print(f"{found_name}")
else:
print(f" ✗ Brak dopasowania (najlepszy score: {best_score:.2f})")
time.sleep(0.5) # Rate limiting
finally:
db.close()
return results
def import_to_database(results: List[CEIDGData]) -> dict:
"""
Importuje dane właścicieli JDG do bazy danych.
@ -260,11 +423,16 @@ def import_to_database(results: List[CEIDGData]) -> dict:
def main():
parser = argparse.ArgumentParser(description="Fetch JDG owner data from CEIDG API v3")
parser = argparse.ArgumentParser(description="Fetch JDG owner data from CEIDG API")
parser.add_argument("--nip", type=str, help="Single NIP to fetch")
parser.add_argument("--name", type=str, help="Search by company name")
parser.add_argument("--city", type=str, help="City for name search (optional)")
parser.add_argument("--all", action="store_true", help="Fetch all JDG from database")
parser.add_argument("--missing-nip", action="store_true", help="Search NIP for companies without NIP")
parser.add_argument("--import", dest="do_import", action="store_true",
help="Import fetched data to database")
parser.add_argument("--apply-nip", action="store_true",
help="Apply found NIPs to database (with --missing-nip)")
parser.add_argument("--output", type=str, help="Output JSON file")
args = parser.parse_args()
@ -281,6 +449,69 @@ def main():
print(f" PKD: {data.pkd_glowny} - {data.pkd_opis}")
print(f" Adres: {data.adres_ulica}, {data.adres_kod} {data.adres_miasto}")
elif args.name:
# Search by name using API v3
search_results = search_ceidg_by_name(args.name, miasto=args.city)
if search_results:
print(f"\n=== Wyniki dla '{args.name}' ===\n")
for i, r in enumerate(search_results[:10]):
# NIP is in wlasciciel object
wlasciciel = r.get("wlasciciel", {})
nip = wlasciciel.get("nip", "?")
regon = wlasciciel.get("regon", "?")
nazwa = r.get("nazwa", "?")
status = r.get("status", "?")
adres = r.get("adresDzialalnosci", {})
miasto = adres.get("miasto", "")
print(f"{i+1}. NIP: {nip} | REGON: {regon}")
print(f" Nazwa: {nazwa}")
print(f" Status: {status}")
print(f" Miasto: {miasto}")
if wlasciciel.get("imie"):
print(f" Właściciel: {wlasciciel.get('imie')} {wlasciciel.get('nazwisko')}")
print()
elif args.missing_nip:
# Search NIP for companies without NIP
found_results = search_missing_nip_companies()
if found_results:
# Summary
high_conf = [r for r in found_results if r["confidence"] == "high"]
medium_conf = [r for r in found_results if r["confidence"] == "medium"]
print(f"\n=== PODSUMOWANIE ===")
print(f"Znaleziono: {len(found_results)} NIP")
print(f" - Wysoka pewność: {len(high_conf)}")
print(f" - Średnia pewność: {len(medium_conf)}")
# Save results
JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
output_file = args.output or str(JSON_OUTPUT_DIR / f"missing_nip_search_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(found_results, f, ensure_ascii=False, indent=2)
print(f"\nWyniki zapisane: {output_file}")
# Apply NIPs if requested
if args.apply_nip and high_conf:
from database import SessionLocal, Company
db = SessionLocal()
try:
print(f"\n=== Zapisywanie {len(high_conf)} NIP (wysoka pewność) ===")
for r in high_conf:
company = db.query(Company).filter(Company.id == r["company_id"]).first()
if company:
company.nip = r["found_nip"]
print(f"{company.name} → NIP {r['found_nip']}")
db.commit()
print(f"\nZapisano {len(high_conf)} NIP do bazy")
except Exception as e:
db.rollback()
print(f" [ERROR] Błąd zapisu: {e}")
finally:
db.close()
return
elif args.all:
from database import SessionLocal, Company