claude-mem plugin v10.6.2
This commit is contained in:
parent
8a109481c3
commit
5166ed5780
1
.claude/worktrees/objective-curie
Submodule
1
.claude/worktrees/objective-curie
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 5682e1622f71978d48b30652b1d9ebb55d2b0dd3
|
||||
14
database/migrations/087_financial_report_fields.sql
Normal file
14
database/migrations/087_financial_report_fields.sql
Normal file
@ -0,0 +1,14 @@
|
||||
-- Migration 087: Add financial data fields to company_financial_reports
|
||||
-- For revenue, profit, assets, employees classification from eKRS sprawozdania
|
||||
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS revenue NUMERIC(15,2);
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS operating_profit NUMERIC(15,2);
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS net_profit NUMERIC(15,2);
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS total_assets NUMERIC(15,2);
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS equity NUMERIC(15,2);
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS liabilities NUMERIC(15,2);
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS employees_count INTEGER;
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS size_class VARCHAR(20);
|
||||
ALTER TABLE company_financial_reports ADD COLUMN IF NOT EXISTS updated_at TIMESTAMP;
|
||||
|
||||
GRANT ALL ON TABLE company_financial_reports TO nordabiz_app;
|
||||
297
scripts/fetch_financial_reports.py
Normal file
297
scripts/fetch_financial_reports.py
Normal file
@ -0,0 +1,297 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Fetch Financial Reports from eKRS
|
||||
==================================
|
||||
|
||||
Downloads and parses e-sprawozdania (XML financial reports) from eKRS
|
||||
for companies with KRS numbers. Extracts key financial figures:
|
||||
revenue, profit, assets, equity, employees.
|
||||
|
||||
Usage:
|
||||
DATABASE_URL=... python3 scripts/fetch_financial_reports.py [--limit 10] [--company-id 11] [--dry-run]
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import logging
|
||||
import time
|
||||
from decimal import Decimal
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
import requests
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
EKRS_API = 'https://api-krs.ms.gov.pl/api/krs'
|
||||
HEADERS = {'Accept': 'application/json', 'User-Agent': 'NordaBiznes/1.0'}
|
||||
|
||||
# XML namespaces used in e-sprawozdania
|
||||
NS = {
|
||||
'tns': 'http://www.mf.gov.pl/schematy/SF/DefinicjeTypySpraworzd662662662662662662662662662662662662662662662662dania',
|
||||
'dtsf': 'http://www.mf.gov.pl/schematy/SF/DefinicjeTypySprawozdaniaFinansworzdaniafinansowego/2018/11/15/DefinicjeTypySprawozdaniaFinansowego/',
|
||||
'jst': 'http://www.mf.gov.pl/schematy/SF/DefinicjeTypySprawozdaniaFinansowego/2018/11/15/JedijsnostkiInneStrukt662662662ktWorzdaniaFinansowego/',
|
||||
}
|
||||
|
||||
|
||||
def fetch_krs_data(krs_number):
|
||||
"""Fetch company data from eKRS API."""
|
||||
url = f'{EKRS_API}/OdsijsId/{krs_number}'
|
||||
try:
|
||||
resp = requests.get(url, headers=HEADERS, timeout=15)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
except Exception as e:
|
||||
logger.error(f'eKRS API error for {krs_number}: {e}')
|
||||
return None
|
||||
|
||||
|
||||
def fetch_financial_documents(krs_number):
|
||||
"""Fetch list of financial documents from eKRS."""
|
||||
url = f'{EKRS_API}/OdpisDokworzdumentowFinansowych/{krs_number}'
|
||||
try:
|
||||
resp = requests.get(url, headers=HEADERS, timeout=15)
|
||||
if resp.status_code == 200:
|
||||
return resp.json()
|
||||
except Exception as e:
|
||||
logger.debug(f'Financial docs API error for {krs_number}: {e}')
|
||||
|
||||
# Try alternative endpoint
|
||||
url2 = f'{EKRS_API}/OdpisAktualny/{krs_number}?rejestr=P&format=json'
|
||||
try:
|
||||
resp = requests.get(url2, headers=HEADERS, timeout=15)
|
||||
if resp.status_code == 200:
|
||||
data = resp.json()
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.debug(f'Alternative API error for {krs_number}: {e}')
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_xml_report(xml_content):
|
||||
"""Parse e-sprawozdanie XML and extract financial figures."""
|
||||
result = {
|
||||
'revenue': None,
|
||||
'operating_profit': None,
|
||||
'net_profit': None,
|
||||
'total_assets': None,
|
||||
'equity': None,
|
||||
'liabilities': None,
|
||||
'employees_count': None,
|
||||
}
|
||||
|
||||
try:
|
||||
root = ET.fromstring(xml_content)
|
||||
except ET.ParseError:
|
||||
return result
|
||||
|
||||
# Search for financial values in XML — try multiple tag patterns
|
||||
# The XML structure varies by report type (micro, small, full)
|
||||
text = xml_content.decode('utf-8', errors='ignore') if isinstance(xml_content, bytes) else xml_content
|
||||
|
||||
def find_value(patterns):
|
||||
for pattern in patterns:
|
||||
for elem in root.iter():
|
||||
tag = elem.tag.split('}')[-1] if '}' in elem.tag else elem.tag
|
||||
if tag.lower() == pattern.lower() and elem.text:
|
||||
try:
|
||||
val = elem.text.strip().replace(',', '.').replace(' ', '')
|
||||
return Decimal(val)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
result['revenue'] = find_value([
|
||||
'PrzychodyNettoZeSprzedazyProduktowTowarowIMaterialow',
|
||||
'PrzychodyNettoZeSprzedazy',
|
||||
'PrzychodyNetto',
|
||||
'A', # RZiS pozycja A
|
||||
])
|
||||
|
||||
result['operating_profit'] = find_value([
|
||||
'ZyskStrataZDzialalnosciOperacyjnej',
|
||||
'ZyskOperacyjny',
|
||||
])
|
||||
|
||||
result['net_profit'] = find_value([
|
||||
'ZyskStrataNetto',
|
||||
'ZyskNetto',
|
||||
'StrataNetto',
|
||||
])
|
||||
|
||||
result['total_assets'] = find_value([
|
||||
'AktywaRazem',
|
||||
'SumaAktywow',
|
||||
'Aktywa',
|
||||
])
|
||||
|
||||
result['equity'] = find_value([
|
||||
'KapitalWlasnyRazem',
|
||||
'KapitalWlasny',
|
||||
])
|
||||
|
||||
result['liabilities'] = find_value([
|
||||
'ZobowiazaniaIDokWorzderezerwyNaZobowiazania',
|
||||
'ZobowiazaniaRazem',
|
||||
'Zobowiazania',
|
||||
])
|
||||
|
||||
result['employees_count'] = find_value([
|
||||
'PrzecietneLiczbaZatrudnionych',
|
||||
'LiczbaZatrudnionych',
|
||||
'Zatrudnienie',
|
||||
])
|
||||
|
||||
if result['employees_count']:
|
||||
result['employees_count'] = int(result['employees_count'])
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def process_company(db, company, dry_run=False):
|
||||
"""Process financial reports for a single company."""
|
||||
from database import CompanyFinancialReport
|
||||
|
||||
if not company.krs:
|
||||
return False
|
||||
|
||||
krs = company.krs.lstrip('0')
|
||||
krs_padded = company.krs.zfill(10)
|
||||
|
||||
logger.info(f'Processing {company.name} (KRS: {krs_padded})')
|
||||
|
||||
# Try to fetch financial documents via eKRS API
|
||||
url = f'https://api-krs.ms.gov.pl/api/krs/OdpisAktualny/{krs_padded}?rejestr=P&format=json'
|
||||
try:
|
||||
resp = requests.get(url, headers=HEADERS, timeout=15)
|
||||
if resp.status_code != 200:
|
||||
logger.warning(f' eKRS API returned {resp.status_code}')
|
||||
return False
|
||||
data = resp.json()
|
||||
except Exception as e:
|
||||
logger.error(f' API error: {e}')
|
||||
return False
|
||||
|
||||
# Extract financial data from the response
|
||||
# The eKRS API provides basic data; for full financials we need document downloads
|
||||
odpis = data.get('odpis', {})
|
||||
dane = odpis.get('dane', {})
|
||||
dzial1 = dane.get('dzial1', {})
|
||||
dzial3 = dane.get('dzial3', {})
|
||||
|
||||
# Capital from dzial1
|
||||
kapital = dzial1.get('danePodmiotu', {}).get('kapitaly', {})
|
||||
capital_amount = None
|
||||
if kapital:
|
||||
capital_str = kapital.get('wysokoscKapitaluZakladowego', {}).get('wartosc', '')
|
||||
if capital_str:
|
||||
try:
|
||||
capital_amount = Decimal(str(capital_str).replace(',', '.').replace(' ', ''))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check for sprawozdania in dzial3
|
||||
sprawozdania = dzial3.get('sprawozdaniaFinansowe', {})
|
||||
wzmianki = sprawozdania.get('informacjeOSprWorzdawozdaniach', [])
|
||||
|
||||
if not wzmianki:
|
||||
logger.info(f' No financial reports found in KRS data')
|
||||
return False
|
||||
|
||||
updated = False
|
||||
for wzmianka in wzmianki:
|
||||
if isinstance(wzmianka, dict):
|
||||
okres_od = wzmianka.get('okresOd', '')
|
||||
okres_do = wzmianka.get('okresDo', '')
|
||||
data_zlozenia = wzmianka.get('dataZlozenia', '')
|
||||
|
||||
if okres_od and okres_do:
|
||||
from datetime import datetime as dt
|
||||
try:
|
||||
p_start = dt.strptime(okres_od, '%Y-%m-%d').date()
|
||||
p_end = dt.strptime(okres_do, '%Y-%m-%d').date()
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Check if we already have data with financial figures
|
||||
existing = db.query(CompanyFinancialReport).filter_by(
|
||||
company_id=company.id,
|
||||
period_start=p_start,
|
||||
period_end=p_end,
|
||||
).first()
|
||||
|
||||
if existing and existing.revenue is not None:
|
||||
continue # Already have financial data
|
||||
|
||||
if not existing:
|
||||
existing = CompanyFinancialReport(
|
||||
company_id=company.id,
|
||||
period_start=p_start,
|
||||
period_end=p_end,
|
||||
report_type='annual',
|
||||
source='ekrs',
|
||||
)
|
||||
if not dry_run:
|
||||
db.add(existing)
|
||||
|
||||
if data_zlozenia:
|
||||
try:
|
||||
existing.filed_at = dt.strptime(data_zlozenia, '%Y-%m-%d').date()
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
updated = True
|
||||
|
||||
if updated and not dry_run:
|
||||
db.commit()
|
||||
logger.info(f' Updated report records')
|
||||
|
||||
return updated
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Fetch financial reports from eKRS')
|
||||
parser.add_argument('--limit', type=int, default=0, help='Limit companies to process')
|
||||
parser.add_argument('--company-id', type=int, help='Process single company')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Preview without saving')
|
||||
args = parser.parse_args()
|
||||
|
||||
from database import SessionLocal, Company
|
||||
|
||||
db = SessionLocal()
|
||||
|
||||
if args.company_id:
|
||||
companies = db.query(Company).filter_by(id=args.company_id).all()
|
||||
else:
|
||||
companies = db.query(Company).filter(
|
||||
Company.krs.isnot(None),
|
||||
Company.status == 'active',
|
||||
).order_by(Company.name).all()
|
||||
|
||||
if args.limit:
|
||||
companies = companies[:args.limit]
|
||||
|
||||
logger.info(f'Processing {len(companies)} companies')
|
||||
|
||||
processed = 0
|
||||
for i, company in enumerate(companies):
|
||||
try:
|
||||
if process_company(db, company, dry_run=args.dry_run):
|
||||
processed += 1
|
||||
except Exception as e:
|
||||
logger.error(f'Error processing {company.name}: {e}')
|
||||
|
||||
# Rate limiting
|
||||
if (i + 1) % 5 == 0:
|
||||
time.sleep(1)
|
||||
|
||||
logger.info(f'Done: {processed}/{len(companies)} companies updated')
|
||||
db.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user