- Add company logo display in search results cards - Make logo clickable (links to company profile) - Temporarily hide "Aktualności i wydarzenia" section on company profiles - Add scripts for KRS PDF download/parsing and CEIDG API Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
343 lines
10 KiB
Python
343 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
CEIDG API v3 Client - pobiera dane właścicieli JDG
|
|
|
|
Używa oficjalnego API CEIDG v3 (dane.biznes.gov.pl) do pobierania
|
|
danych o jednoosobowych działalnościach gospodarczych.
|
|
|
|
Usage:
|
|
python scripts/fetch_ceidg_api.py --nip 5881571773
|
|
python scripts/fetch_ceidg_api.py --all # wszystkie JDG z bazy
|
|
python scripts/fetch_ceidg_api.py --all --import # pobierz i importuj do bazy
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import json
|
|
import time
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from dataclasses import dataclass, asdict
|
|
from typing import Optional, List
|
|
import requests
|
|
|
|
# Add parent directory to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
# Load environment
|
|
from dotenv import load_dotenv
|
|
load_dotenv(Path(__file__).parent.parent / '.env')
|
|
|
|
# API Configuration
|
|
CEIDG_API_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firma"
|
|
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
|
|
|
|
# Output directory for JSON cache
|
|
JSON_OUTPUT_DIR = Path(__file__).parent.parent / "data" / "ceidg_json"
|
|
|
|
|
|
@dataclass
|
|
class CEIDGOwner:
|
|
"""Dane właściciela JDG z CEIDG"""
|
|
imie: str
|
|
nazwisko: str
|
|
nip: str
|
|
regon: str = ""
|
|
|
|
def to_dict(self):
|
|
return asdict(self)
|
|
|
|
|
|
@dataclass
|
|
class CEIDGData:
|
|
"""Dane firmy z CEIDG API v3"""
|
|
id: str
|
|
nazwa: str
|
|
nip: str
|
|
regon: str = ""
|
|
wlasciciel: Optional[CEIDGOwner] = None
|
|
adres_miasto: str = ""
|
|
adres_ulica: str = ""
|
|
adres_kod: str = ""
|
|
pkd_glowny: str = ""
|
|
pkd_opis: str = ""
|
|
data_rozpoczecia: str = ""
|
|
status: str = ""
|
|
zrodlo: str = "dane.biznes.gov.pl"
|
|
pobrano: str = ""
|
|
|
|
def to_dict(self):
|
|
d = asdict(self)
|
|
if self.wlasciciel:
|
|
d['wlasciciel'] = self.wlasciciel.to_dict()
|
|
return d
|
|
|
|
|
|
def fetch_ceidg_data(nip: str) -> Optional[CEIDGData]:
|
|
"""
|
|
Pobiera dane z CEIDG API v3 dla podanego NIP.
|
|
|
|
Returns:
|
|
CEIDGData lub None jeśli nie znaleziono
|
|
"""
|
|
if not CEIDG_API_KEY:
|
|
print(" [ERROR] Brak CEIDG_API_KEY w .env")
|
|
return None
|
|
|
|
print(f" [INFO] Pobieranie danych CEIDG dla NIP {nip}...")
|
|
|
|
headers = {
|
|
"Authorization": f"Bearer {CEIDG_API_KEY}",
|
|
"Accept": "application/json"
|
|
}
|
|
|
|
try:
|
|
response = requests.get(
|
|
CEIDG_API_URL,
|
|
params={"nip": nip},
|
|
headers=headers,
|
|
timeout=30
|
|
)
|
|
|
|
if response.status_code == 204:
|
|
print(f" [WARN] Brak danych w CEIDG dla NIP {nip}")
|
|
return None
|
|
|
|
if response.status_code == 401:
|
|
print(f" [ERROR] Błąd autoryzacji - sprawdź CEIDG_API_KEY")
|
|
return None
|
|
|
|
if response.status_code != 200:
|
|
print(f" [ERROR] HTTP {response.status_code}: {response.text[:100]}")
|
|
return None
|
|
|
|
data = response.json()
|
|
|
|
if "firma" not in data or not data["firma"]:
|
|
print(f" [WARN] Brak danych firmy w odpowiedzi")
|
|
return None
|
|
|
|
firma = data["firma"][0]
|
|
|
|
# Parse owner data
|
|
owner = None
|
|
if "wlasciciel" in firma:
|
|
w = firma["wlasciciel"]
|
|
owner = CEIDGOwner(
|
|
imie=w.get("imie", ""),
|
|
nazwisko=w.get("nazwisko", ""),
|
|
nip=w.get("nip", nip),
|
|
regon=w.get("regon", "")
|
|
)
|
|
|
|
# Parse address
|
|
adres = firma.get("adresDzialalnosci", {})
|
|
adres_ulica = ""
|
|
if adres.get("ulica"):
|
|
adres_ulica = adres.get("ulica", "")
|
|
if adres.get("budynek"):
|
|
adres_ulica += f" {adres.get('budynek')}"
|
|
if adres.get("lokal"):
|
|
adres_ulica += f"/{adres.get('lokal')}"
|
|
|
|
# Parse PKD
|
|
pkd_glowny = firma.get("pkdGlowny", {})
|
|
|
|
ceidg_data = CEIDGData(
|
|
id=firma.get("id", ""),
|
|
nazwa=firma.get("nazwa", ""),
|
|
nip=nip,
|
|
regon=owner.regon if owner else "",
|
|
wlasciciel=owner,
|
|
adres_miasto=adres.get("miasto", ""),
|
|
adres_ulica=adres_ulica,
|
|
adres_kod=adres.get("kod", ""),
|
|
pkd_glowny=pkd_glowny.get("kod", ""),
|
|
pkd_opis=pkd_glowny.get("nazwa", ""),
|
|
data_rozpoczecia=firma.get("dataRozpoczecia", ""),
|
|
status=firma.get("status", ""),
|
|
pobrano=datetime.now().isoformat()
|
|
)
|
|
|
|
if owner:
|
|
print(f" [OK] {owner.imie} {owner.nazwisko} ({ceidg_data.status})")
|
|
else:
|
|
print(f" [OK] {ceidg_data.nazwa} ({ceidg_data.status})")
|
|
|
|
return ceidg_data
|
|
|
|
except requests.RequestException as e:
|
|
print(f" [ERROR] Błąd połączenia: {e}")
|
|
return None
|
|
except json.JSONDecodeError as e:
|
|
print(f" [ERROR] Błąd parsowania JSON: {e}")
|
|
return None
|
|
|
|
|
|
def import_to_database(results: List[CEIDGData]) -> dict:
|
|
"""
|
|
Importuje dane właścicieli JDG do bazy danych.
|
|
|
|
Returns:
|
|
dict z podsumowaniem importu
|
|
"""
|
|
from database import SessionLocal, Company, Person, CompanyPerson
|
|
|
|
db = SessionLocal()
|
|
stats = {"imported": 0, "updated": 0, "skipped": 0, "errors": 0}
|
|
|
|
try:
|
|
for data in results:
|
|
if not data.wlasciciel:
|
|
stats["skipped"] += 1
|
|
continue
|
|
|
|
owner = data.wlasciciel
|
|
|
|
# Find company by NIP
|
|
company = db.query(Company).filter(Company.nip == data.nip).first()
|
|
if not company:
|
|
print(f" [SKIP] Firma z NIP {data.nip} nie istnieje w bazie")
|
|
stats["skipped"] += 1
|
|
continue
|
|
|
|
# Find or create person (by name since JDG owners don't have PESEL in API)
|
|
person = db.query(Person).filter(
|
|
Person.nazwisko == owner.nazwisko,
|
|
Person.imiona == owner.imie
|
|
).first()
|
|
|
|
if not person:
|
|
person = Person(
|
|
imiona=owner.imie,
|
|
nazwisko=owner.nazwisko,
|
|
pesel=None # CEIDG API doesn't return PESEL
|
|
)
|
|
db.add(person)
|
|
db.flush()
|
|
print(f" [NEW] Utworzono osobę: {owner.imie} {owner.nazwisko}")
|
|
|
|
# Check if relationship already exists
|
|
existing = db.query(CompanyPerson).filter(
|
|
CompanyPerson.company_id == company.id,
|
|
CompanyPerson.person_id == person.id,
|
|
CompanyPerson.role_category == "wlasciciel_jdg"
|
|
).first()
|
|
|
|
if existing:
|
|
# Update source if needed
|
|
if existing.source != "dane.biznes.gov.pl":
|
|
existing.source = "dane.biznes.gov.pl"
|
|
existing.fetched_at = datetime.now()
|
|
stats["updated"] += 1
|
|
else:
|
|
stats["skipped"] += 1
|
|
else:
|
|
# Create new relationship
|
|
company_person = CompanyPerson(
|
|
company_id=company.id,
|
|
person_id=person.id,
|
|
role="WŁAŚCICIEL",
|
|
role_category="wlasciciel_jdg",
|
|
source="dane.biznes.gov.pl",
|
|
fetched_at=datetime.now()
|
|
)
|
|
db.add(company_person)
|
|
stats["imported"] += 1
|
|
print(f" [ADD] {owner.imie} {owner.nazwisko} → {company.name}")
|
|
|
|
db.commit()
|
|
|
|
except Exception as e:
|
|
db.rollback()
|
|
print(f" [ERROR] Błąd importu: {e}")
|
|
stats["errors"] += 1
|
|
finally:
|
|
db.close()
|
|
|
|
return stats
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Fetch JDG owner data from CEIDG API v3")
|
|
parser.add_argument("--nip", type=str, help="Single NIP to fetch")
|
|
parser.add_argument("--all", action="store_true", help="Fetch all JDG from database")
|
|
parser.add_argument("--import", dest="do_import", action="store_true",
|
|
help="Import fetched data to database")
|
|
parser.add_argument("--output", type=str, help="Output JSON file")
|
|
args = parser.parse_args()
|
|
|
|
results = []
|
|
|
|
if args.nip:
|
|
data = fetch_ceidg_data(args.nip)
|
|
if data:
|
|
results.append(data)
|
|
print(f"\n=== {data.nazwa} ===")
|
|
if data.wlasciciel:
|
|
print(f" Właściciel: {data.wlasciciel.imie} {data.wlasciciel.nazwisko}")
|
|
print(f" Status: {data.status}")
|
|
print(f" PKD: {data.pkd_glowny} - {data.pkd_opis}")
|
|
print(f" Adres: {data.adres_ulica}, {data.adres_kod} {data.adres_miasto}")
|
|
|
|
elif args.all:
|
|
from database import SessionLocal, Company
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
# Get JDG companies (no KRS)
|
|
jdg_companies = db.query(Company).filter(
|
|
(Company.krs.is_(None)) | (Company.krs == ''),
|
|
Company.nip.isnot(None),
|
|
Company.nip != ''
|
|
).all()
|
|
|
|
print(f"Znaleziono {len(jdg_companies)} firm JDG\n")
|
|
|
|
success = 0
|
|
failed = 0
|
|
|
|
for i, company in enumerate(jdg_companies):
|
|
print(f"[{i+1}/{len(jdg_companies)}] {company.name}")
|
|
data = fetch_ceidg_data(company.nip)
|
|
if data:
|
|
results.append(data)
|
|
success += 1
|
|
else:
|
|
failed += 1
|
|
time.sleep(0.5) # Rate limiting
|
|
|
|
print(f"\n=== PODSUMOWANIE ===")
|
|
print(f"Pobrano: {success}")
|
|
print(f"Błędy/brak danych: {failed}")
|
|
|
|
finally:
|
|
db.close()
|
|
|
|
else:
|
|
parser.print_help()
|
|
return
|
|
|
|
# Save to JSON cache
|
|
if results:
|
|
JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
output_file = args.output or str(JSON_OUTPUT_DIR / f"ceidg_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump([r.to_dict() for r in results], f, ensure_ascii=False, indent=2)
|
|
print(f"\nDane zapisane do: {output_file}")
|
|
|
|
# Import to database if requested
|
|
if args.do_import and results:
|
|
print("\n=== IMPORT DO BAZY ===")
|
|
stats = import_to_database(results)
|
|
print(f"\nZaimportowano: {stats['imported']}")
|
|
print(f"Zaktualizowano: {stats['updated']}")
|
|
print(f"Pominięto: {stats['skipped']}")
|
|
print(f"Błędy: {stats['errors']}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|