feat: Add CEIDG API name search and --missing-nip option
- Add search_ceidg_by_name() for API v3 name-based queries - Add search_missing_nip_companies() to find NIP for companies without NIP - Add --missing-nip flag to search for all companies missing NIP - Add --apply-nip flag to save found NIPs to database - Fix API endpoint: /api/ceidg/v3/firmy (not /firma) - Correctly extract NIP from wlasciciel object in response Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
99f7a5a88a
commit
79a1a60621
@ -1,13 +1,15 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
CEIDG API v3 Client - pobiera dane właścicieli JDG
|
||||
CEIDG API Client - pobiera dane właścicieli JDG
|
||||
|
||||
Używa oficjalnego API CEIDG v3 (dane.biznes.gov.pl) do pobierania
|
||||
Używa oficjalnego API CEIDG (dane.biznes.gov.pl) do pobierania
|
||||
danych o jednoosobowych działalnościach gospodarczych.
|
||||
|
||||
Usage:
|
||||
python scripts/fetch_ceidg_api.py --nip 5881571773
|
||||
python scripts/fetch_ceidg_api.py --name "Kancelaria Notarialna"
|
||||
python scripts/fetch_ceidg_api.py --all # wszystkie JDG z bazy
|
||||
python scripts/fetch_ceidg_api.py --missing-nip # firmy bez NIP
|
||||
python scripts/fetch_ceidg_api.py --all --import # pobierz i importuj do bazy
|
||||
"""
|
||||
|
||||
@ -30,7 +32,8 @@ from dotenv import load_dotenv
|
||||
load_dotenv(Path(__file__).parent.parent / '.env')
|
||||
|
||||
# API Configuration
|
||||
CEIDG_API_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firma"
|
||||
# API v3 - main endpoint for company queries (supports NIP, REGON, nazwa, etc.)
|
||||
CEIDG_API_V3_URL = "https://dane.biznes.gov.pl/api/ceidg/v3/firmy"
|
||||
CEIDG_API_KEY = os.getenv("CEIDG_API_KEY")
|
||||
|
||||
# Output directory for JSON cache
|
||||
@ -94,7 +97,7 @@ def fetch_ceidg_data(nip: str) -> Optional[CEIDGData]:
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
CEIDG_API_URL,
|
||||
CEIDG_API_V3_URL,
|
||||
params={"nip": nip},
|
||||
headers=headers,
|
||||
timeout=30
|
||||
@ -175,6 +178,166 @@ def fetch_ceidg_data(nip: str) -> Optional[CEIDGData]:
|
||||
return None
|
||||
|
||||
|
||||
def search_ceidg_by_name(nazwa: str, miasto: str = None) -> List[dict]:
|
||||
"""
|
||||
Wyszukuje firmy w CEIDG po nazwie używając API v3.
|
||||
|
||||
Args:
|
||||
nazwa: Nazwa firmy do wyszukania
|
||||
miasto: Opcjonalnie miasto do zawężenia wyników
|
||||
|
||||
Returns:
|
||||
Lista słowników z danymi firm
|
||||
"""
|
||||
if not CEIDG_API_KEY:
|
||||
print(" [ERROR] Brak CEIDG_API_KEY w .env")
|
||||
return []
|
||||
|
||||
print(f" [INFO] Wyszukiwanie w CEIDG: '{nazwa}'...")
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {CEIDG_API_KEY}",
|
||||
"Accept": "application/json"
|
||||
}
|
||||
|
||||
params = {"nazwa": nazwa}
|
||||
if miasto:
|
||||
params["miasto"] = miasto
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
CEIDG_API_V3_URL,
|
||||
params=params,
|
||||
headers=headers,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 204:
|
||||
print(f" [WARN] Brak wyników dla '{nazwa}'")
|
||||
return []
|
||||
|
||||
if response.status_code == 401:
|
||||
print(f" [ERROR] Błąd autoryzacji - sprawdź CEIDG_API_KEY")
|
||||
return []
|
||||
|
||||
if response.status_code != 200:
|
||||
print(f" [ERROR] HTTP {response.status_code}: {response.text[:200]}")
|
||||
return []
|
||||
|
||||
data = response.json()
|
||||
|
||||
if "firmy" not in data or not data["firmy"]:
|
||||
print(f" [WARN] Brak wyników dla '{nazwa}'")
|
||||
return []
|
||||
|
||||
results = data["firmy"]
|
||||
print(f" [OK] Znaleziono {len(results)} wyników")
|
||||
|
||||
return results
|
||||
|
||||
except requests.RequestException as e:
|
||||
print(f" [ERROR] Błąd połączenia: {e}")
|
||||
return []
|
||||
except json.JSONDecodeError as e:
|
||||
print(f" [ERROR] Błąd parsowania JSON: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def search_missing_nip_companies() -> List[dict]:
|
||||
"""
|
||||
Wyszukuje NIP dla firm bez NIP w bazie, używając nazwy firmy.
|
||||
|
||||
Returns:
|
||||
Lista znalezionych dopasowań
|
||||
"""
|
||||
from database import SessionLocal, Company
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
db = SessionLocal()
|
||||
results = []
|
||||
|
||||
try:
|
||||
# Pobierz firmy bez NIP
|
||||
companies = db.query(Company).filter(
|
||||
(Company.nip.is_(None)) | (Company.nip == '')
|
||||
).order_by(Company.name).all()
|
||||
|
||||
print(f"\n=== Wyszukiwanie NIP dla {len(companies)} firm ===\n")
|
||||
|
||||
for i, company in enumerate(companies):
|
||||
print(f"[{i+1}/{len(companies)}] {company.name}")
|
||||
|
||||
# Szukaj po nazwie
|
||||
search_results = search_ceidg_by_name(
|
||||
company.name,
|
||||
miasto=company.address_city
|
||||
)
|
||||
|
||||
if not search_results:
|
||||
time.sleep(0.5)
|
||||
continue
|
||||
|
||||
# Szukaj najlepszego dopasowania
|
||||
best_match = None
|
||||
best_score = 0.0
|
||||
|
||||
for result in search_results:
|
||||
result_name = result.get("nazwa", "")
|
||||
|
||||
# Oblicz podobieństwo nazwy
|
||||
score = SequenceMatcher(
|
||||
None,
|
||||
company.name.lower(),
|
||||
result_name.lower()
|
||||
).ratio()
|
||||
|
||||
# Bonus za zgodne miasto
|
||||
if company.address_city and result.get("adresDzialalnosci", {}).get("miasto"):
|
||||
if company.address_city.lower() in result["adresDzialalnosci"]["miasto"].lower():
|
||||
score += 0.2
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_match = result
|
||||
|
||||
if best_match and best_score >= 0.6:
|
||||
# NIP is in wlasciciel object
|
||||
wlasciciel = best_match.get("wlasciciel", {})
|
||||
nip = wlasciciel.get("nip")
|
||||
regon = wlasciciel.get("regon")
|
||||
found_name = best_match.get("nazwa", "")
|
||||
status = best_match.get("status", "")
|
||||
|
||||
if not nip:
|
||||
print(f" ✗ Znaleziono firmę ale brak NIP w odpowiedzi")
|
||||
continue
|
||||
|
||||
confidence = "high" if best_score >= 0.8 else "medium"
|
||||
|
||||
results.append({
|
||||
"company_id": company.id,
|
||||
"company_name": company.name,
|
||||
"found_nip": nip,
|
||||
"found_regon": regon,
|
||||
"found_name": found_name,
|
||||
"found_status": status,
|
||||
"score": round(best_score, 2),
|
||||
"confidence": confidence
|
||||
})
|
||||
|
||||
print(f" ✓ Znaleziono: NIP {nip} ({confidence}, score: {best_score:.2f})")
|
||||
print(f" → {found_name}")
|
||||
else:
|
||||
print(f" ✗ Brak dopasowania (najlepszy score: {best_score:.2f})")
|
||||
|
||||
time.sleep(0.5) # Rate limiting
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def import_to_database(results: List[CEIDGData]) -> dict:
|
||||
"""
|
||||
Importuje dane właścicieli JDG do bazy danych.
|
||||
@ -260,11 +423,16 @@ def import_to_database(results: List[CEIDGData]) -> dict:
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Fetch JDG owner data from CEIDG API v3")
|
||||
parser = argparse.ArgumentParser(description="Fetch JDG owner data from CEIDG API")
|
||||
parser.add_argument("--nip", type=str, help="Single NIP to fetch")
|
||||
parser.add_argument("--name", type=str, help="Search by company name")
|
||||
parser.add_argument("--city", type=str, help="City for name search (optional)")
|
||||
parser.add_argument("--all", action="store_true", help="Fetch all JDG from database")
|
||||
parser.add_argument("--missing-nip", action="store_true", help="Search NIP for companies without NIP")
|
||||
parser.add_argument("--import", dest="do_import", action="store_true",
|
||||
help="Import fetched data to database")
|
||||
parser.add_argument("--apply-nip", action="store_true",
|
||||
help="Apply found NIPs to database (with --missing-nip)")
|
||||
parser.add_argument("--output", type=str, help="Output JSON file")
|
||||
args = parser.parse_args()
|
||||
|
||||
@ -281,6 +449,69 @@ def main():
|
||||
print(f" PKD: {data.pkd_glowny} - {data.pkd_opis}")
|
||||
print(f" Adres: {data.adres_ulica}, {data.adres_kod} {data.adres_miasto}")
|
||||
|
||||
elif args.name:
|
||||
# Search by name using API v3
|
||||
search_results = search_ceidg_by_name(args.name, miasto=args.city)
|
||||
if search_results:
|
||||
print(f"\n=== Wyniki dla '{args.name}' ===\n")
|
||||
for i, r in enumerate(search_results[:10]):
|
||||
# NIP is in wlasciciel object
|
||||
wlasciciel = r.get("wlasciciel", {})
|
||||
nip = wlasciciel.get("nip", "?")
|
||||
regon = wlasciciel.get("regon", "?")
|
||||
nazwa = r.get("nazwa", "?")
|
||||
status = r.get("status", "?")
|
||||
adres = r.get("adresDzialalnosci", {})
|
||||
miasto = adres.get("miasto", "")
|
||||
print(f"{i+1}. NIP: {nip} | REGON: {regon}")
|
||||
print(f" Nazwa: {nazwa}")
|
||||
print(f" Status: {status}")
|
||||
print(f" Miasto: {miasto}")
|
||||
if wlasciciel.get("imie"):
|
||||
print(f" Właściciel: {wlasciciel.get('imie')} {wlasciciel.get('nazwisko')}")
|
||||
print()
|
||||
|
||||
elif args.missing_nip:
|
||||
# Search NIP for companies without NIP
|
||||
found_results = search_missing_nip_companies()
|
||||
|
||||
if found_results:
|
||||
# Summary
|
||||
high_conf = [r for r in found_results if r["confidence"] == "high"]
|
||||
medium_conf = [r for r in found_results if r["confidence"] == "medium"]
|
||||
|
||||
print(f"\n=== PODSUMOWANIE ===")
|
||||
print(f"Znaleziono: {len(found_results)} NIP")
|
||||
print(f" - Wysoka pewność: {len(high_conf)}")
|
||||
print(f" - Średnia pewność: {len(medium_conf)}")
|
||||
|
||||
# Save results
|
||||
JSON_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
output_file = args.output or str(JSON_OUTPUT_DIR / f"missing_nip_search_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json")
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(found_results, f, ensure_ascii=False, indent=2)
|
||||
print(f"\nWyniki zapisane: {output_file}")
|
||||
|
||||
# Apply NIPs if requested
|
||||
if args.apply_nip and high_conf:
|
||||
from database import SessionLocal, Company
|
||||
db = SessionLocal()
|
||||
try:
|
||||
print(f"\n=== Zapisywanie {len(high_conf)} NIP (wysoka pewność) ===")
|
||||
for r in high_conf:
|
||||
company = db.query(Company).filter(Company.id == r["company_id"]).first()
|
||||
if company:
|
||||
company.nip = r["found_nip"]
|
||||
print(f" ✓ {company.name} → NIP {r['found_nip']}")
|
||||
db.commit()
|
||||
print(f"\nZapisano {len(high_conf)} NIP do bazy")
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
print(f" [ERROR] Błąd zapisu: {e}")
|
||||
finally:
|
||||
db.close()
|
||||
return
|
||||
|
||||
elif args.all:
|
||||
from database import SessionLocal, Company
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user