refactor: Move company/validation/enrichment API routes to blueprints

- Create blueprints/api/routes_company.py with 9 routes:
  - /api/companies, /api/connections
  - /api/check-email, /api/verify-nip, /api/verify-krs
  - /api/company/<id>/refresh-krs, /api/company/<id>/enrich-ai
  - /api/model-info, /api/admin/test-sanitization
- app.py: 5520 → 4756 lines (-764)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-31 18:08:36 +01:00
parent 94dd5327bc
commit ff59872529
3 changed files with 809 additions and 770 deletions

776
app.py
View File

@ -1139,786 +1139,22 @@ def health_full():
# ============================================================
# USER DASHBOARD - MOVED TO blueprints/public/routes.py
# ============================================================
# API ROUTES (for frontend)
# ============================================================
@app.route('/api/companies')
def api_companies():
"""API: Get all companies"""
db = SessionLocal()
try:
companies = db.query(Company).filter_by(status='active').all()
return jsonify({
'success': True,
'companies': [
{
'id': c.id,
'name': c.name,
'category': c.category.name if c.category else None,
'description': c.description_short,
'website': c.website,
'phone': c.phone,
'email': c.email
}
for c in companies
]
})
finally:
db.close()
@app.route('/api/connections')
def api_connections():
"""
API: Get company-person connections for D3.js visualization.
Returns nodes (companies and people) and links (relationships).
"""
db = SessionLocal()
try:
# Get all companies with people data
companies = db.query(Company).filter_by(status='active').all()
# Get all people with company relationships
people = db.query(Person).join(CompanyPerson).distinct().all()
# Build nodes
nodes = []
# Company nodes
for c in companies:
nodes.append({
'id': f'company_{c.id}',
'name': c.name,
'type': 'company',
'category': c.category.name if c.category else 'Other',
'slug': c.slug,
'has_krs': bool(c.krs),
'city': c.address_city or ''
})
# Person nodes
for p in people:
# Count UNIQUE companies this person is connected to (not roles)
company_count = len(set(r.company_id for r in p.company_roles if r.company and r.company.status == 'active'))
nodes.append({
'id': f'person_{p.id}',
'name': f'{p.imiona} {p.nazwisko}',
'type': 'person',
'company_count': company_count
})
# Build links
links = []
for p in people:
for role in p.company_roles:
if role.company and role.company.status == 'active':
links.append({
'source': f'person_{p.id}',
'target': f'company_{role.company_id}',
'role': role.role,
'category': role.role_category
})
return jsonify({
'success': True,
'nodes': nodes,
'links': links,
'stats': {
'companies': len([n for n in nodes if n['type'] == 'company']),
'people': len([n for n in nodes if n['type'] == 'person']),
'connections': len(links)
}
})
finally:
db.close()
# ============================================================
# SEO AUDIT API ROUTES - MOVED TO blueprints/api/routes_seo_audit.py
# ============================================================
# Routes: /api/seo/audit, /api/seo/audit/<slug>, /api/seo/audit (POST)
# Helper functions: _build_seo_audit_response, _get_seo_audit_for_company
# ============================================================
# SEO & GBP AUDIT DASHBOARDS - MOVED TO: blueprints/admin/routes_audits.py
# API ROUTES - MOVED TO: blueprints/api/routes_company.py
# Routes: /api/companies, /api/connections, /api/check-email, /api/verify-nip,
# /api/verify-krs, /api/company/<id>/refresh-krs, /api/company/<id>/enrich-ai,
# /api/model-info, /api/admin/test-sanitization
# ============================================================
# ============================================================
# GBP AUDIT API - MOVED TO: blueprints/api/routes_gbp_audit.py
# SEO/GBP/SOCIAL AUDIT API - MOVED TO: blueprints/api/routes_*_audit.py
# ============================================================
# ============================================================
# AUDIT DASHBOARDS - MOVED TO: blueprints/audit/routes.py
# Routes: /audit/seo/<slug>, /audit/social/<slug>, /audit/gbp/<slug>, /audit/it/<slug>
# ============================================================
@app.route('/api/check-email', methods=['POST'])
def api_check_email():
"""API: Check if email is available"""
data = request.get_json()
email = data.get('email', '').strip().lower()
# Validate email format
if not email or not validate_email(email):
return jsonify({
'available': False,
'error': 'Nieprawidłowy format email'
}), 400
db = SessionLocal()
try:
# Check if email exists
existing_user = db.query(User).filter_by(email=email).first()
return jsonify({
'available': existing_user is None,
'email': email
})
finally:
db.close()
@app.route('/api/verify-nip', methods=['POST'])
def api_verify_nip():
"""API: Verify NIP and check if company is NORDA member"""
data = request.get_json()
nip = data.get('nip', '').strip()
# Validate NIP format
if not nip or not re.match(r'^\d{10}$', nip):
return jsonify({
'success': False,
'error': 'Nieprawidłowy format NIP'
}), 400
db = SessionLocal()
try:
# Check if NIP exists in companies database
company = db.query(Company).filter_by(nip=nip, status='active').first()
if company:
return jsonify({
'success': True,
'is_member': True,
'company_name': company.name,
'company_id': company.id
})
else:
return jsonify({
'success': True,
'is_member': False,
'company_name': None,
'company_id': None
})
finally:
db.close()
@app.route('/api/verify-krs', methods=['GET', 'POST'])
def api_verify_krs():
"""
API: Verify company data from KRS Open API (prs.ms.gov.pl).
GET /api/verify-krs?krs=0000817317
POST /api/verify-krs with JSON body: {"krs": "0000817317"}
Returns official KRS data including:
- Company name, NIP, REGON
- Address
- Capital
- Registration date
- Management board (anonymized in Open API)
- Shareholders (anonymized in Open API)
"""
# Get KRS from query params (GET) or JSON body (POST)
if request.method == 'GET':
krs = request.args.get('krs', '').strip()
else:
data = request.get_json(silent=True) or {}
krs = data.get('krs', '').strip()
# Validate KRS format (7-10 digits)
if not krs or not re.match(r'^\d{7,10}$', krs):
return jsonify({
'success': False,
'error': 'Nieprawidłowy format KRS (wymagane 7-10 cyfr)'
}), 400
# Normalize to 10 digits
krs_normalized = krs.zfill(10)
try:
# Fetch data from KRS Open API
krs_data = krs_api_service.get_company_from_krs(krs_normalized)
if krs_data is None:
return jsonify({
'success': False,
'error': f'Nie znaleziono podmiotu o KRS {krs_normalized} w rejestrze',
'krs': krs_normalized
}), 404
# Check if company exists in our database
db = SessionLocal()
try:
our_company = db.query(Company).filter_by(krs=krs_normalized).first()
is_member = our_company is not None
company_id = our_company.id if our_company else None
finally:
db.close()
return jsonify({
'success': True,
'krs': krs_normalized,
'is_norda_member': is_member,
'company_id': company_id,
'data': krs_data.to_dict(),
'formatted_address': krs_api_service.format_address(krs_data),
'source': 'KRS Open API (prs.ms.gov.pl)',
'note': 'Dane osobowe (imiona, nazwiska) są zanonimizowane w Open API'
})
except Exception as e:
return jsonify({
'success': False,
'error': f'Błąd podczas pobierania danych z KRS: {str(e)}'
}), 500
@app.route('/api/company/<int:company_id>/refresh-krs', methods=['POST'])
@login_required
def api_refresh_company_krs(company_id):
"""
API: Refresh company data from KRS Open API.
Updates company record with official KRS data.
Requires login.
"""
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({
'success': False,
'error': 'Firma nie znaleziona'
}), 404
if not company.krs:
return jsonify({
'success': False,
'error': 'Firma nie ma numeru KRS'
}), 400
# Fetch data from KRS
krs_data = krs_api_service.get_company_from_krs(company.krs)
if krs_data is None:
return jsonify({
'success': False,
'error': f'Nie znaleziono podmiotu o KRS {company.krs} w rejestrze'
}), 404
# Update company data (only non-personal data)
updates = {}
if krs_data.nip and krs_data.nip != company.nip:
updates['nip'] = krs_data.nip
company.nip = krs_data.nip
if krs_data.regon:
regon_9 = krs_data.regon[:9]
if regon_9 != company.regon:
updates['regon'] = regon_9
company.regon = regon_9
# Update address if significantly different
new_address = krs_api_service.format_address(krs_data)
if new_address and new_address != company.address:
updates['address'] = new_address
company.address = new_address
if krs_data.miejscowosc and krs_data.miejscowosc != company.city:
updates['city'] = krs_data.miejscowosc
company.city = krs_data.miejscowosc
if krs_data.kapital_zakladowy:
updates['kapital_zakladowy'] = krs_data.kapital_zakladowy
# Note: Might need to add this field to Company model
# Update verification timestamp
company.krs_verified_at = datetime.utcnow()
db.commit()
return jsonify({
'success': True,
'company_id': company_id,
'updates': updates,
'krs_data': krs_data.to_dict(),
'message': f'Zaktualizowano {len(updates)} pól' if updates else 'Dane są aktualne'
})
except Exception as e:
db.rollback()
return jsonify({
'success': False,
'error': f'Błąd podczas aktualizacji: {str(e)}'
}), 500
finally:
db.close()
def _search_brave_for_company(company_name: str, city: str = None) -> dict:
"""
Search Brave API for company information.
Returns dict with news items and web results.
"""
import requests
brave_api_key = os.getenv('BRAVE_API_KEY')
if not brave_api_key:
logger.warning("BRAVE_API_KEY not configured, skipping web search")
return {'news': [], 'web': []}
results = {'news': [], 'web': []}
# Build search query
query = f'"{company_name}"'
if city:
query += f' {city}'
try:
headers = {
'Accept': 'application/json',
'X-Subscription-Token': brave_api_key
}
# Search news
news_params = {
'q': query,
'count': 5,
'freshness': 'py', # past year
'country': 'pl',
'search_lang': 'pl'
}
news_response = requests.get(
'https://api.search.brave.com/res/v1/news/search',
headers=headers,
params=news_params,
timeout=10
)
if news_response.status_code == 200:
news_data = news_response.json()
for item in news_data.get('results', [])[:5]:
results['news'].append({
'title': item.get('title', ''),
'description': item.get('description', ''),
'url': item.get('url', ''),
'source': item.get('meta_url', {}).get('hostname', '')
})
logger.info(f"Brave News: found {len(results['news'])} items for '{company_name}'")
# Search web
web_params = {
'q': query,
'count': 5,
'country': 'pl',
'search_lang': 'pl'
}
web_response = requests.get(
'https://api.search.brave.com/res/v1/web/search',
headers=headers,
params=web_params,
timeout=10
)
if web_response.status_code == 200:
web_data = web_response.json()
for item in web_data.get('web', {}).get('results', [])[:5]:
results['web'].append({
'title': item.get('title', ''),
'description': item.get('description', ''),
'url': item.get('url', '')
})
logger.info(f"Brave Web: found {len(results['web'])} items for '{company_name}'")
except Exception as e:
logger.error(f"Brave search error for '{company_name}': {e}")
return results
def _fetch_website_content(url: str) -> str:
"""
Fetch and extract text content from company website.
Returns first 2000 chars of text content.
"""
import requests
from bs4 import BeautifulSoup
if not url:
return ''
try:
# Ensure URL has protocol
if not url.startswith('http'):
url = 'https://' + url
response = requests.get(url, timeout=10, headers={
'User-Agent': 'Mozilla/5.0 (compatible; NordaBizBot/1.0)'
})
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts and styles
for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
tag.decompose()
# Get text content
text = soup.get_text(separator=' ', strip=True)
# Clean up whitespace
text = ' '.join(text.split())
logger.info(f"Fetched {len(text)} chars from {url}")
return text[:3000] # Limit to 3000 chars
except Exception as e:
logger.warning(f"Failed to fetch website content from {url}: {e}")
return ''
@app.route('/api/company/<int:company_id>/enrich-ai', methods=['POST'])
@login_required
@limiter.limit("5 per hour")
def api_enrich_company_ai(company_id):
"""
API: Enrich company data using AI (Gemini) with web search.
Process:
1. Search Brave API for company news and web results
2. Fetch content from company website
3. Combine with existing database data
4. Send to Gemini for AI-powered enrichment
Generates AI insights including:
- Business summary
- Services list
- Target market
- Unique selling points
- Company values
- Certifications
- Industry tags
Requires: Admin or company owner permissions.
Rate limited to 5 requests per hour per user.
"""
import json
db = SessionLocal()
try:
# Get company
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({
'success': False,
'error': 'Firma nie znaleziona'
}), 404
# Check permissions: admin or company owner
logger.info(f"Permission check: user={current_user.email}, is_admin={current_user.is_admin}, user_company_id={current_user.company_id}, target_company_id={company.id}")
if not current_user.is_admin and current_user.company_id != company.id:
return jsonify({
'success': False,
'error': 'Brak uprawnien. Tylko administrator lub wlasciciel firmy moze wzbogacac dane.'
}), 403
# Get Gemini service
service = gemini_service.get_gemini_service()
if not service:
return jsonify({
'success': False,
'error': 'Usluga AI jest niedostepna. Skontaktuj sie z administratorem.'
}), 503
logger.info(f"AI enrichment triggered by {current_user.email} for company: {company.name} (ID: {company.id})")
# ============================================
# STEP 1: Search the web for company info
# ============================================
brave_results = _search_brave_for_company(company.name, company.address_city)
# Format news for prompt
news_text = ""
if brave_results['news']:
news_text = "\n".join([
f"- {item['title']}: {item['description'][:200]}"
for item in brave_results['news'][:3]
])
# Format web results for prompt
web_text = ""
if brave_results['web']:
web_text = "\n".join([
f"- {item['title']}: {item['description'][:200]}"
for item in brave_results['web'][:3]
])
# ============================================
# STEP 2: Fetch company website content
# ============================================
website_content = ""
if company.website:
website_content = _fetch_website_content(company.website)
# ============================================
# STEP 3: Collect existing company data
# ============================================
services_list = []
if company.services:
services_list = [cs.service.name for cs in company.services if cs.service]
elif company.services_offered:
services_list = [company.services_offered]
competencies_list = []
if company.competencies:
competencies_list = [cc.competency.name for cc in company.competencies if cc.competency]
existing_data = {
'nazwa': company.name,
'opis_krotki': company.description_short or '',
'opis_pelny': company.description_full or '',
'kategoria': company.category.name if company.category else '',
'uslugi': ', '.join(services_list) if services_list else '',
'kompetencje': ', '.join(competencies_list) if competencies_list else '',
'wartosci': company.core_values or '',
'strona_www': company.website or '',
'miasto': company.address_city or '',
'branza': company.pkd_description or ''
}
# ============================================
# STEP 4: Build comprehensive prompt for AI
# ============================================
prompt = f"""Przeanalizuj wszystkie dostepne dane o polskiej firmie i wygeneruj wzbogacone informacje.
=== DANE Z BAZY DANYCH ===
Nazwa: {existing_data['nazwa']}
Kategoria: {existing_data['kategoria']}
Opis krotki: {existing_data['opis_krotki']}
Opis pelny: {existing_data['opis_pelny']}
Uslugi: {existing_data['uslugi']}
Kompetencje: {existing_data['kompetencje']}
Wartosci firmy: {existing_data['wartosci']}
Strona WWW: {existing_data['strona_www']}
Miasto: {existing_data['miasto']}
Branza (PKD): {existing_data['branza']}
=== INFORMACJE Z INTERNETU (Brave Search) ===
Newsy o firmie:
{news_text if news_text else '(brak znalezionych newsow)'}
Wyniki wyszukiwania:
{web_text if web_text else '(brak wynikow)'}
=== TRESC ZE STRONY WWW FIRMY ===
{website_content[:2000] if website_content else '(nie udalo sie pobrac tresci strony)'}
=== ZADANIE ===
Na podstawie WSZYSTKICH powyzszych danych (baza danych, wyszukiwarka, strona WWW) wygeneruj wzbogacone informacje o firmie.
Wykorzystaj informacje z internetu do uzupelnienia brakujacych danych.
Jesli znalazles nowe uslugi, certyfikaty lub informacje - dodaj je do odpowiedzi.
Odpowiedz WYLACZNIE w formacie JSON (bez dodatkowego tekstu):
{{
"business_summary": "Zwiezly opis dzialalnosci firmy (2-3 zdania) na podstawie wszystkich zrodel",
"services_list": ["usluga1", "usluga2", "usluga3", "usluga4", "usluga5"],
"target_market": "Opis grupy docelowej klientow",
"unique_selling_points": ["wyroznik1", "wyroznik2", "wyroznik3"],
"company_values": ["wartosc1", "wartosc2", "wartosc3"],
"certifications": ["certyfikat1", "certyfikat2"],
"industry_tags": ["tag1", "tag2", "tag3", "tag4", "tag5"],
"recent_news": "Krotkie podsumowanie ostatnich newsow o firmie (jesli sa)",
"suggested_category": "Sugerowana kategoria glowna",
"category_confidence": 0.85,
"data_sources_used": ["database", "brave_search", "website"]
}}
WAZNE:
- Odpowiedz TYLKO JSON, bez markdown, bez ```json
- Wszystkie teksty po polsku
- Listy powinny zawierac 3-5 elementow
- category_confidence to liczba od 0 do 1
- Wykorzystaj maksymalnie informacje z internetu
"""
# Call Gemini API
start_time = time.time()
response_text = service.generate_text(
prompt=prompt,
temperature=0.7,
feature='ai_enrichment',
user_id=current_user.id,
company_id=company.id,
related_entity_type='company',
related_entity_id=company.id
)
processing_time = int((time.time() - start_time) * 1000)
# Parse JSON response
try:
# Clean response - remove markdown code blocks if present
clean_response = response_text.strip()
if clean_response.startswith('```'):
clean_response = clean_response.split('```')[1]
if clean_response.startswith('json'):
clean_response = clean_response[4:]
clean_response = clean_response.strip()
ai_data = json.loads(clean_response)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse AI response: {e}\nResponse: {response_text[:500]}")
return jsonify({
'success': False,
'error': 'Blad parsowania odpowiedzi AI. Sprobuj ponownie.'
}), 500
# Save or update AI insights
existing_insights = db.query(CompanyAIInsights).filter_by(company_id=company.id).first()
if existing_insights:
# Update existing
existing_insights.business_summary = ai_data.get('business_summary')
existing_insights.services_list = ai_data.get('services_list', [])
existing_insights.target_market = ai_data.get('target_market')
existing_insights.unique_selling_points = ai_data.get('unique_selling_points', [])
existing_insights.company_values = ai_data.get('company_values', [])
existing_insights.certifications = ai_data.get('certifications', [])
existing_insights.industry_tags = ai_data.get('industry_tags', [])
existing_insights.suggested_category = ai_data.get('suggested_category')
existing_insights.category_confidence = ai_data.get('category_confidence')
existing_insights.ai_confidence_score = 0.85 # Default confidence
existing_insights.processing_time_ms = processing_time
existing_insights.analyzed_at = datetime.utcnow()
else:
# Create new
new_insights = CompanyAIInsights(
company_id=company.id,
business_summary=ai_data.get('business_summary'),
services_list=ai_data.get('services_list', []),
target_market=ai_data.get('target_market'),
unique_selling_points=ai_data.get('unique_selling_points', []),
company_values=ai_data.get('company_values', []),
certifications=ai_data.get('certifications', []),
industry_tags=ai_data.get('industry_tags', []),
suggested_category=ai_data.get('suggested_category'),
category_confidence=ai_data.get('category_confidence'),
ai_confidence_score=0.85,
processing_time_ms=processing_time,
analyzed_at=datetime.utcnow()
)
db.add(new_insights)
db.commit()
# Count sources used
sources_used = ['database']
if brave_results['news'] or brave_results['web']:
sources_used.append('brave_search')
if website_content:
sources_used.append('website')
logger.info(f"AI enrichment completed for {company.name}. Processing time: {processing_time}ms. Sources: {sources_used}")
return jsonify({
'success': True,
'message': f'Dane firmy "{company.name}" zostaly wzbogacone przez AI',
'processing_time_ms': processing_time,
'sources_used': sources_used,
'brave_results_count': len(brave_results['news']) + len(brave_results['web']),
'website_content_length': len(website_content),
'insights': ai_data
})
except Exception as e:
db.rollback()
logger.error(f"AI enrichment error for company {company_id}: {str(e)}")
return jsonify({
'success': False,
'error': f'Blad podczas wzbogacania danych: {str(e)}'
}), 500
finally:
db.close()
@app.route('/api/model-info', methods=['GET'])
def api_model_info():
"""API: Get current AI model information"""
service = gemini_service.get_gemini_service()
if service:
return jsonify({
'success': True,
'model': service.model_name,
'provider': 'Google Gemini'
})
else:
return jsonify({
'success': False,
'error': 'AI service not initialized'
}), 500
@app.route('/api/admin/test-sanitization', methods=['POST'])
@login_required
def test_sanitization():
"""
Admin API: Test sensitive data detection without saving.
Allows admins to verify what data would be sanitized.
"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Admin access required'}), 403
try:
from sensitive_data_service import sanitize_message
data = request.get_json()
text = data.get('text', '')
if not text:
return jsonify({'success': False, 'error': 'Text is required'}), 400
sanitized, matches = sanitize_message(text)
return jsonify({
'success': True,
'original': text,
'sanitized': sanitized,
'matches': [
{
'type': m.data_type.value,
'original': m.original,
'masked': m.masked,
'confidence': m.confidence
}
for m in matches
],
'has_sensitive_data': len(matches) > 0
})
except ImportError:
return jsonify({
'success': False,
'error': 'Sensitive data service not available'
}), 500
except Exception as e:
logger.error(f"Error testing sanitization: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
# Validation and Company API routes moved to blueprints/api/routes_company.py
# ============================================================
# DEVELOPMENT INSIGHTS (Roadmap from user feedback)

View File

@ -15,3 +15,4 @@ from . import routes_contacts # noqa: E402, F401
from . import routes_seo_audit # noqa: E402, F401
from . import routes_gbp_audit # noqa: E402, F401
from . import routes_social_audit # noqa: E402, F401
from . import routes_company # noqa: E402, F401

View File

@ -0,0 +1,802 @@
"""
Company API Routes - API blueprint
Migrated from app.py as part of the blueprint refactoring.
Contains API routes for company data, validation, and AI enrichment.
"""
import json
import logging
import os
import re
import time
from datetime import datetime
import requests
from bs4 import BeautifulSoup
from flask import jsonify, request, current_app
from flask_login import current_user, login_required
from database import (
SessionLocal, Company, User, Person, CompanyPerson, CompanyAIInsights
)
import gemini_service
import krs_api_service
from . import bp
logger = logging.getLogger(__name__)
# ============================================================
# COMPANY DATA API ROUTES
# ============================================================
@bp.route('/companies')
def api_companies():
"""API: Get all companies"""
db = SessionLocal()
try:
companies = db.query(Company).filter_by(status='active').all()
return jsonify({
'success': True,
'companies': [
{
'id': c.id,
'name': c.name,
'category': c.category.name if c.category else None,
'description': c.description_short,
'website': c.website,
'phone': c.phone,
'email': c.email
}
for c in companies
]
})
finally:
db.close()
@bp.route('/connections')
def api_connections():
"""
API: Get company-person connections for D3.js visualization.
Returns nodes (companies and people) and links (relationships).
"""
db = SessionLocal()
try:
# Get all companies with people data
companies = db.query(Company).filter_by(status='active').all()
# Get all people with company relationships
people = db.query(Person).join(CompanyPerson).distinct().all()
# Build nodes
nodes = []
# Company nodes
for c in companies:
nodes.append({
'id': f'company_{c.id}',
'name': c.name,
'type': 'company',
'category': c.category.name if c.category else 'Other',
'slug': c.slug,
'has_krs': bool(c.krs),
'city': c.address_city or ''
})
# Person nodes
for p in people:
# Count UNIQUE companies this person is connected to (not roles)
company_count = len(set(r.company_id for r in p.company_roles if r.company and r.company.status == 'active'))
nodes.append({
'id': f'person_{p.id}',
'name': f'{p.imiona} {p.nazwisko}',
'type': 'person',
'company_count': company_count
})
# Build links
links = []
for p in people:
for role in p.company_roles:
if role.company and role.company.status == 'active':
links.append({
'source': f'person_{p.id}',
'target': f'company_{role.company_id}',
'role': role.role,
'category': role.role_category
})
return jsonify({
'success': True,
'nodes': nodes,
'links': links,
'stats': {
'companies': len([n for n in nodes if n['type'] == 'company']),
'people': len([n for n in nodes if n['type'] == 'person']),
'connections': len(links)
}
})
finally:
db.close()
# ============================================================
# VALIDATION API ROUTES
# ============================================================
def validate_email(email):
"""Simple email validation"""
import re
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
@bp.route('/check-email', methods=['POST'])
def api_check_email():
"""API: Check if email is available"""
data = request.get_json()
email = data.get('email', '').strip().lower()
# Validate email format
if not email or not validate_email(email):
return jsonify({
'available': False,
'error': 'Nieprawidłowy format email'
}), 400
db = SessionLocal()
try:
# Check if email exists
existing_user = db.query(User).filter_by(email=email).first()
return jsonify({
'available': existing_user is None,
'email': email
})
finally:
db.close()
@bp.route('/verify-nip', methods=['POST'])
def api_verify_nip():
"""API: Verify NIP and check if company is NORDA member"""
data = request.get_json()
nip = data.get('nip', '').strip()
# Validate NIP format
if not nip or not re.match(r'^\d{10}$', nip):
return jsonify({
'success': False,
'error': 'Nieprawidłowy format NIP'
}), 400
db = SessionLocal()
try:
# Check if NIP exists in companies database
company = db.query(Company).filter_by(nip=nip, status='active').first()
if company:
return jsonify({
'success': True,
'is_member': True,
'company_name': company.name,
'company_id': company.id
})
else:
return jsonify({
'success': True,
'is_member': False,
'company_name': None,
'company_id': None
})
finally:
db.close()
@bp.route('/verify-krs', methods=['GET', 'POST'])
def api_verify_krs():
"""
API: Verify company data from KRS Open API (prs.ms.gov.pl).
GET /api/verify-krs?krs=0000817317
POST /api/verify-krs with JSON body: {"krs": "0000817317"}
Returns official KRS data including:
- Company name, NIP, REGON
- Address
- Capital
- Registration date
- Management board (anonymized in Open API)
- Shareholders (anonymized in Open API)
"""
# Get KRS from query params (GET) or JSON body (POST)
if request.method == 'GET':
krs = request.args.get('krs', '').strip()
else:
data = request.get_json(silent=True) or {}
krs = data.get('krs', '').strip()
# Validate KRS format (7-10 digits)
if not krs or not re.match(r'^\d{7,10}$', krs):
return jsonify({
'success': False,
'error': 'Nieprawidłowy format KRS (wymagane 7-10 cyfr)'
}), 400
# Normalize to 10 digits
krs_normalized = krs.zfill(10)
try:
# Fetch data from KRS Open API
krs_data = krs_api_service.get_company_from_krs(krs_normalized)
if krs_data is None:
return jsonify({
'success': False,
'error': f'Nie znaleziono podmiotu o KRS {krs_normalized} w rejestrze',
'krs': krs_normalized
}), 404
# Check if company exists in our database
db = SessionLocal()
try:
our_company = db.query(Company).filter_by(krs=krs_normalized).first()
is_member = our_company is not None
company_id = our_company.id if our_company else None
finally:
db.close()
return jsonify({
'success': True,
'krs': krs_normalized,
'is_norda_member': is_member,
'company_id': company_id,
'data': krs_data.to_dict(),
'formatted_address': krs_api_service.format_address(krs_data),
'source': 'KRS Open API (prs.ms.gov.pl)',
'note': 'Dane osobowe (imiona, nazwiska) są zanonimizowane w Open API'
})
except Exception as e:
return jsonify({
'success': False,
'error': f'Błąd podczas pobierania danych z KRS: {str(e)}'
}), 500
@bp.route('/company/<int:company_id>/refresh-krs', methods=['POST'])
@login_required
def api_refresh_company_krs(company_id):
"""
API: Refresh company data from KRS Open API.
Updates company record with official KRS data.
Requires login.
"""
db = SessionLocal()
try:
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({
'success': False,
'error': 'Firma nie znaleziona'
}), 404
if not company.krs:
return jsonify({
'success': False,
'error': 'Firma nie ma numeru KRS'
}), 400
# Fetch data from KRS
krs_data = krs_api_service.get_company_from_krs(company.krs)
if krs_data is None:
return jsonify({
'success': False,
'error': f'Nie znaleziono podmiotu o KRS {company.krs} w rejestrze'
}), 404
# Update company data (only non-personal data)
updates = {}
if krs_data.nip and krs_data.nip != company.nip:
updates['nip'] = krs_data.nip
company.nip = krs_data.nip
if krs_data.regon:
regon_9 = krs_data.regon[:9]
if regon_9 != company.regon:
updates['regon'] = regon_9
company.regon = regon_9
# Update address if significantly different
new_address = krs_api_service.format_address(krs_data)
if new_address and new_address != company.address:
updates['address'] = new_address
company.address = new_address
if krs_data.miejscowosc and krs_data.miejscowosc != company.city:
updates['city'] = krs_data.miejscowosc
company.city = krs_data.miejscowosc
if krs_data.kapital_zakladowy:
updates['kapital_zakladowy'] = krs_data.kapital_zakladowy
# Update verification timestamp
company.krs_verified_at = datetime.utcnow()
db.commit()
return jsonify({
'success': True,
'company_id': company_id,
'updates': updates,
'krs_data': krs_data.to_dict(),
'message': f'Zaktualizowano {len(updates)} pól' if updates else 'Dane są aktualne'
})
except Exception as e:
db.rollback()
return jsonify({
'success': False,
'error': f'Błąd podczas aktualizacji: {str(e)}'
}), 500
finally:
db.close()
# ============================================================
# AI ENRICHMENT HELPER FUNCTIONS
# ============================================================
def _search_brave_for_company(company_name: str, city: str = None) -> dict:
"""
Search Brave API for company information.
Returns dict with news items and web results.
"""
brave_api_key = os.getenv('BRAVE_API_KEY')
if not brave_api_key:
logger.warning("BRAVE_API_KEY not configured, skipping web search")
return {'news': [], 'web': []}
results = {'news': [], 'web': []}
# Build search query
query = f'"{company_name}"'
if city:
query += f' {city}'
try:
headers = {
'Accept': 'application/json',
'X-Subscription-Token': brave_api_key
}
# Search news
news_params = {
'q': query,
'count': 5,
'freshness': 'py', # past year
'country': 'pl',
'search_lang': 'pl'
}
news_response = requests.get(
'https://api.search.brave.com/res/v1/news/search',
headers=headers,
params=news_params,
timeout=10
)
if news_response.status_code == 200:
news_data = news_response.json()
for item in news_data.get('results', [])[:5]:
results['news'].append({
'title': item.get('title', ''),
'description': item.get('description', ''),
'url': item.get('url', ''),
'source': item.get('meta_url', {}).get('hostname', '')
})
logger.info(f"Brave News: found {len(results['news'])} items for '{company_name}'")
# Search web
web_params = {
'q': query,
'count': 5,
'country': 'pl',
'search_lang': 'pl'
}
web_response = requests.get(
'https://api.search.brave.com/res/v1/web/search',
headers=headers,
params=web_params,
timeout=10
)
if web_response.status_code == 200:
web_data = web_response.json()
for item in web_data.get('web', {}).get('results', [])[:5]:
results['web'].append({
'title': item.get('title', ''),
'description': item.get('description', ''),
'url': item.get('url', '')
})
logger.info(f"Brave Web: found {len(results['web'])} items for '{company_name}'")
except Exception as e:
logger.error(f"Brave search error for '{company_name}': {e}")
return results
def _fetch_website_content(url: str) -> str:
"""
Fetch and extract text content from company website.
Returns first 3000 chars of text content.
"""
if not url:
return ''
try:
# Ensure URL has protocol
if not url.startswith('http'):
url = 'https://' + url
response = requests.get(url, timeout=10, headers={
'User-Agent': 'Mozilla/5.0 (compatible; NordaBizBot/1.0)'
})
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
# Remove scripts and styles
for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
tag.decompose()
# Get text content
text = soup.get_text(separator=' ', strip=True)
# Clean up whitespace
text = ' '.join(text.split())
logger.info(f"Fetched {len(text)} chars from {url}")
return text[:3000] # Limit to 3000 chars
except Exception as e:
logger.warning(f"Failed to fetch website content from {url}: {e}")
return ''
# ============================================================
# AI ENRICHMENT API ROUTE
# ============================================================
@bp.route('/company/<int:company_id>/enrich-ai', methods=['POST'])
@login_required
def api_enrich_company_ai(company_id):
"""
API: Enrich company data using AI (Gemini) with web search.
Process:
1. Search Brave API for company news and web results
2. Fetch content from company website
3. Combine with existing database data
4. Send to Gemini for AI-powered enrichment
Generates AI insights including:
- Business summary
- Services list
- Target market
- Unique selling points
- Company values
- Certifications
- Industry tags
Requires: Admin or company owner permissions.
Rate limited to 5 requests per hour per user.
"""
db = SessionLocal()
try:
# Get company
company = db.query(Company).filter_by(id=company_id).first()
if not company:
return jsonify({
'success': False,
'error': 'Firma nie znaleziona'
}), 404
# Check permissions: admin or company owner
logger.info(f"Permission check: user={current_user.email}, is_admin={current_user.is_admin}, user_company_id={current_user.company_id}, target_company_id={company.id}")
if not current_user.is_admin and current_user.company_id != company.id:
return jsonify({
'success': False,
'error': 'Brak uprawnien. Tylko administrator lub wlasciciel firmy moze wzbogacac dane.'
}), 403
# Get Gemini service
service = gemini_service.get_gemini_service()
if not service:
return jsonify({
'success': False,
'error': 'Usluga AI jest niedostepna. Skontaktuj sie z administratorem.'
}), 503
logger.info(f"AI enrichment triggered by {current_user.email} for company: {company.name} (ID: {company.id})")
# ============================================
# STEP 1: Search the web for company info
# ============================================
brave_results = _search_brave_for_company(company.name, company.address_city)
# Format news for prompt
news_text = ""
if brave_results['news']:
news_text = "\n".join([
f"- {item['title']}: {item['description'][:200]}"
for item in brave_results['news'][:3]
])
# Format web results for prompt
web_text = ""
if brave_results['web']:
web_text = "\n".join([
f"- {item['title']}: {item['description'][:200]}"
for item in brave_results['web'][:3]
])
# ============================================
# STEP 2: Fetch company website content
# ============================================
website_content = ""
if company.website:
website_content = _fetch_website_content(company.website)
# ============================================
# STEP 3: Collect existing company data
# ============================================
services_list = []
if company.services:
services_list = [cs.service.name for cs in company.services if cs.service]
elif company.services_offered:
services_list = [company.services_offered]
competencies_list = []
if company.competencies:
competencies_list = [cc.competency.name for cc in company.competencies if cc.competency]
existing_data = {
'nazwa': company.name,
'opis_krotki': company.description_short or '',
'opis_pelny': company.description_full or '',
'kategoria': company.category.name if company.category else '',
'uslugi': ', '.join(services_list) if services_list else '',
'kompetencje': ', '.join(competencies_list) if competencies_list else '',
'wartosci': company.core_values or '',
'strona_www': company.website or '',
'miasto': company.address_city or '',
'branza': company.pkd_description or ''
}
# ============================================
# STEP 4: Build comprehensive prompt for AI
# ============================================
prompt = f"""Przeanalizuj wszystkie dostepne dane o polskiej firmie i wygeneruj wzbogacone informacje.
=== DANE Z BAZY DANYCH ===
Nazwa: {existing_data['nazwa']}
Kategoria: {existing_data['kategoria']}
Opis krotki: {existing_data['opis_krotki']}
Opis pelny: {existing_data['opis_pelny']}
Uslugi: {existing_data['uslugi']}
Kompetencje: {existing_data['kompetencje']}
Wartosci firmy: {existing_data['wartosci']}
Strona WWW: {existing_data['strona_www']}
Miasto: {existing_data['miasto']}
Branza (PKD): {existing_data['branza']}
=== INFORMACJE Z INTERNETU (Brave Search) ===
Newsy o firmie:
{news_text if news_text else '(brak znalezionych newsow)'}
Wyniki wyszukiwania:
{web_text if web_text else '(brak wynikow)'}
=== TRESC ZE STRONY WWW FIRMY ===
{website_content[:2000] if website_content else '(nie udalo sie pobrac tresci strony)'}
=== ZADANIE ===
Na podstawie WSZYSTKICH powyzszych danych (baza danych, wyszukiwarka, strona WWW) wygeneruj wzbogacone informacje o firmie.
Wykorzystaj informacje z internetu do uzupelnienia brakujacych danych.
Jesli znalazles nowe uslugi, certyfikaty lub informacje - dodaj je do odpowiedzi.
Odpowiedz WYLACZNIE w formacie JSON (bez dodatkowego tekstu):
{{
"business_summary": "Zwiezly opis dzialalnosci firmy (2-3 zdania) na podstawie wszystkich zrodel",
"services_list": ["usluga1", "usluga2", "usluga3", "usluga4", "usluga5"],
"target_market": "Opis grupy docelowej klientow",
"unique_selling_points": ["wyroznik1", "wyroznik2", "wyroznik3"],
"company_values": ["wartosc1", "wartosc2", "wartosc3"],
"certifications": ["certyfikat1", "certyfikat2"],
"industry_tags": ["tag1", "tag2", "tag3", "tag4", "tag5"],
"recent_news": "Krotkie podsumowanie ostatnich newsow o firmie (jesli sa)",
"suggested_category": "Sugerowana kategoria glowna",
"category_confidence": 0.85,
"data_sources_used": ["database", "brave_search", "website"]
}}
WAZNE:
- Odpowiedz TYLKO JSON, bez markdown, bez ```json
- Wszystkie teksty po polsku
- Listy powinny zawierac 3-5 elementow
- category_confidence to liczba od 0 do 1
- Wykorzystaj maksymalnie informacje z internetu
"""
# Call Gemini API
start_time = time.time()
response_text = service.generate_text(
prompt=prompt,
temperature=0.7,
feature='ai_enrichment',
user_id=current_user.id,
company_id=company.id,
related_entity_type='company',
related_entity_id=company.id
)
processing_time = int((time.time() - start_time) * 1000)
# Parse JSON response
try:
# Clean response - remove markdown code blocks if present
clean_response = response_text.strip()
if clean_response.startswith('```'):
clean_response = clean_response.split('```')[1]
if clean_response.startswith('json'):
clean_response = clean_response[4:]
clean_response = clean_response.strip()
ai_data = json.loads(clean_response)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse AI response: {e}\nResponse: {response_text[:500]}")
return jsonify({
'success': False,
'error': 'Blad parsowania odpowiedzi AI. Sprobuj ponownie.'
}), 500
# Save or update AI insights
existing_insights = db.query(CompanyAIInsights).filter_by(company_id=company.id).first()
if existing_insights:
# Update existing
existing_insights.business_summary = ai_data.get('business_summary')
existing_insights.services_list = ai_data.get('services_list', [])
existing_insights.target_market = ai_data.get('target_market')
existing_insights.unique_selling_points = ai_data.get('unique_selling_points', [])
existing_insights.company_values = ai_data.get('company_values', [])
existing_insights.certifications = ai_data.get('certifications', [])
existing_insights.industry_tags = ai_data.get('industry_tags', [])
existing_insights.suggested_category = ai_data.get('suggested_category')
existing_insights.category_confidence = ai_data.get('category_confidence')
existing_insights.ai_confidence_score = 0.85 # Default confidence
existing_insights.processing_time_ms = processing_time
existing_insights.analyzed_at = datetime.utcnow()
else:
# Create new
new_insights = CompanyAIInsights(
company_id=company.id,
business_summary=ai_data.get('business_summary'),
services_list=ai_data.get('services_list', []),
target_market=ai_data.get('target_market'),
unique_selling_points=ai_data.get('unique_selling_points', []),
company_values=ai_data.get('company_values', []),
certifications=ai_data.get('certifications', []),
industry_tags=ai_data.get('industry_tags', []),
suggested_category=ai_data.get('suggested_category'),
category_confidence=ai_data.get('category_confidence'),
ai_confidence_score=0.85,
processing_time_ms=processing_time,
analyzed_at=datetime.utcnow()
)
db.add(new_insights)
db.commit()
# Count sources used
sources_used = ['database']
if brave_results['news'] or brave_results['web']:
sources_used.append('brave_search')
if website_content:
sources_used.append('website')
logger.info(f"AI enrichment completed for {company.name}. Processing time: {processing_time}ms. Sources: {sources_used}")
return jsonify({
'success': True,
'message': f'Dane firmy "{company.name}" zostaly wzbogacone przez AI',
'processing_time_ms': processing_time,
'sources_used': sources_used,
'brave_results_count': len(brave_results['news']) + len(brave_results['web']),
'website_content_length': len(website_content),
'insights': ai_data
})
except Exception as e:
db.rollback()
logger.error(f"AI enrichment error for company {company_id}: {str(e)}")
return jsonify({
'success': False,
'error': f'Blad podczas wzbogacania danych: {str(e)}'
}), 500
finally:
db.close()
# ============================================================
# UTILITY API ROUTES
# ============================================================
@bp.route('/model-info', methods=['GET'])
def api_model_info():
"""API: Get current AI model information"""
service = gemini_service.get_gemini_service()
if service:
return jsonify({
'success': True,
'model': service.model_name,
'provider': 'Google Gemini'
})
else:
return jsonify({
'success': False,
'error': 'AI service not initialized'
}), 500
@bp.route('/admin/test-sanitization', methods=['POST'])
@login_required
def test_sanitization():
"""
Admin API: Test sensitive data detection without saving.
Allows admins to verify what data would be sanitized.
"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Admin access required'}), 403
try:
from sensitive_data_service import sanitize_message
data = request.get_json()
text = data.get('text', '')
if not text:
return jsonify({'success': False, 'error': 'Text is required'}), 400
sanitized, matches = sanitize_message(text)
return jsonify({
'success': True,
'original': text,
'sanitized': sanitized,
'matches': [
{
'type': m.data_type.value,
'original': m.original,
'masked': m.masked,
'confidence': m.confidence
}
for m in matches
],
'has_sensitive_data': len(matches) > 0
})
except ImportError:
return jsonify({
'success': False,
'error': 'Sensitive data service not available'
}), 500
except Exception as e:
logger.error(f"Error testing sanitization: {e}")
return jsonify({'success': False, 'error': str(e)}), 500