feat: add data quality dashboard, auto-scoring, bulk enrichment and GBP data flow
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Extract 12-field completeness scoring to utils/data_quality.py service
- Auto-update data_quality_score and data_quality label on company data changes
- Add /admin/data-quality dashboard with field coverage stats, quality distribution, and sortable company table
- Add bulk enrichment with background processing, step selection, and progress tracking
- Flow GBP phone/website to Company record when company fields are empty
- Display Google opening hours on public company profile
- Add BulkEnrichmentJob model and migration 075
- Refactor arm_company.py to support selective steps and progress callbacks

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-21 07:02:45 +01:00
parent a311dcf0f1
commit 93e90b2c72
15 changed files with 1403 additions and 28 deletions

View File

@ -29,3 +29,5 @@ from . import routes_membership # noqa: E402, F401
from . import routes_benefits # noqa: E402, F401 from . import routes_benefits # noqa: E402, F401
from . import routes_competitors # noqa: E402, F401 from . import routes_competitors # noqa: E402, F401
from . import routes_social_publisher # noqa: E402, F401 from . import routes_social_publisher # noqa: E402, F401
from . import routes_data_quality # noqa: E402, F401
from . import routes_bulk_enrichment # noqa: E402, F401

View File

@ -0,0 +1,193 @@
"""
Admin Bulk Enrichment Routes
=============================
Batch enrichment operations for multiple companies at once.
"""
import logging
import threading
import time
from datetime import datetime
from flask import request, jsonify
from flask_login import login_required, current_user
from . import bp
from database import SessionLocal, Company, BulkEnrichmentJob, SystemRole
from utils.decorators import role_required
logger = logging.getLogger(__name__)
def _run_bulk_enrichment(job_id, company_ids, steps):
"""Background worker for bulk enrichment. Runs in a separate thread."""
import sys
import os
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
if base_dir not in sys.path:
sys.path.insert(0, base_dir)
scripts_dir = os.path.join(base_dir, 'scripts')
if scripts_dir not in sys.path:
sys.path.insert(0, scripts_dir)
from scripts.arm_company import arm_company
db = SessionLocal()
try:
job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first()
if not job:
logger.error(f"Bulk enrichment job {job_id} not found")
return
all_results = {}
for i, cid in enumerate(company_ids):
company = db.query(Company).filter_by(id=cid).first()
company_name = company.name if company else f"ID {cid}"
try:
result = arm_company(cid, force=False, steps=steps)
if isinstance(result, dict):
all_results[str(cid)] = {
'name': company_name,
'results': result,
}
else:
all_results[str(cid)] = {
'name': company_name,
'results': {'error': 'Firma nie znaleziona' if not result else 'Nieznany błąd'},
}
except Exception as e:
logger.error(f"Bulk enrichment error for company {cid}: {e}")
all_results[str(cid)] = {
'name': company_name,
'results': {'error': str(e)[:200]},
}
# Update progress
job.processed_companies = i + 1
job.results = all_results
db.commit()
# Delay between companies to respect API limits
if i < len(company_ids) - 1:
time.sleep(2)
job.status = 'completed'
job.completed_at = datetime.now()
db.commit()
logger.info(f"Bulk enrichment job {job_id} completed: {len(company_ids)} companies")
except Exception as e:
logger.error(f"Bulk enrichment job {job_id} failed: {e}")
try:
job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first()
if job:
job.status = 'failed'
job.completed_at = datetime.now()
db.commit()
except Exception:
pass
finally:
db.close()
@bp.route('/data-quality/bulk-enrich', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def bulk_enrich():
"""Start a bulk enrichment job."""
data = request.get_json()
if not data:
return jsonify({'error': 'Brak danych'}), 400
company_ids = data.get('company_ids', [])
steps = data.get('steps', ['registry', 'seo', 'social', 'gbp', 'logo'])
if not company_ids:
return jsonify({'error': 'Nie wybrano firm'}), 400
if len(company_ids) > 50:
return jsonify({'error': 'Maksymalnie 50 firm na raz'}), 400
valid_steps = {'registry', 'seo', 'social', 'gbp', 'logo'}
steps = [s for s in steps if s in valid_steps]
if not steps:
return jsonify({'error': 'Nie wybrano kroków'}), 400
db = SessionLocal()
try:
# Validate company IDs
existing = db.query(Company.id).filter(Company.id.in_(company_ids)).all()
existing_ids = [r[0] for r in existing]
if len(existing_ids) != len(company_ids):
missing = set(company_ids) - set(existing_ids)
return jsonify({'error': f'Nie znaleziono firm: {missing}'}), 400
# Create job
job = BulkEnrichmentJob(
started_by=current_user.id,
total_companies=len(company_ids),
steps=steps,
results={},
)
db.add(job)
db.commit()
job_id = job.id
# Start background thread
thread = threading.Thread(
target=_run_bulk_enrichment,
args=(job_id, company_ids, steps),
daemon=True,
)
thread.start()
logger.info(f"Bulk enrichment job {job_id} started by {current_user.email}: {len(company_ids)} companies, steps={steps}")
return jsonify({'job_id': job_id, 'total': len(company_ids)})
finally:
db.close()
@bp.route('/data-quality/bulk-enrich/status')
@login_required
@role_required(SystemRole.ADMIN)
def bulk_enrich_status():
"""Check status of a bulk enrichment job."""
job_id = request.args.get('job_id', type=int)
if not job_id:
return jsonify({'error': 'Brak job_id'}), 400
db = SessionLocal()
try:
job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first()
if not job:
return jsonify({'error': 'Job nie znaleziony'}), 404
# Get latest result for progress log
latest_result = None
if job.results and job.processed_companies > 0:
results_dict = job.results
# Find the last processed company
for cid, data in results_dict.items():
name = data.get('name', cid)
res = data.get('results', {})
ok = sum(1 for v in res.values() if isinstance(v, str) and (v.startswith('OK') or v.startswith('SKIP (done)')))
total_steps = len(res)
latest_result = f"{name}: {ok}/{total_steps} kroków OK"
return jsonify({
'job_id': job.id,
'status': job.status,
'processed': job.processed_companies,
'total': job.total_companies,
'latest_result': latest_result,
'results': job.results if job.status != 'running' else None,
})
finally:
db.close()

View File

@ -21,6 +21,7 @@ from database import (
CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit
) )
from utils.decorators import role_required from utils.decorators import role_required
from utils.data_quality import compute_data_quality_score, update_company_data_quality
# Logger # Logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -157,6 +158,9 @@ def admin_company_add():
db.commit() db.commit()
db.refresh(new_company) db.refresh(new_company)
update_company_data_quality(new_company, db)
db.commit()
logger.info(f"Admin {current_user.email} created new company: {name} (ID: {new_company.id})") logger.info(f"Admin {current_user.email} created new company: {name} (ID: {new_company.id})")
return jsonify({ return jsonify({
@ -263,6 +267,9 @@ def admin_company_update(company_id):
company.last_updated = datetime.utcnow() company.last_updated = datetime.utcnow()
db.commit() db.commit()
update_company_data_quality(company, db)
db.commit()
logger.info(f"Admin {current_user.email} updated company {company.name} (ID: {company_id})") logger.info(f"Admin {current_user.email} updated company {company.name} (ID: {company_id})")
return jsonify({ return jsonify({
@ -726,27 +733,7 @@ def admin_company_detail(company_id):
} }
# --- Completeness score (12 fields) --- # --- Completeness score (12 fields) ---
fields = { completeness = compute_data_quality_score(company, db)
'NIP': bool(company.nip),
'Adres': bool(company.address_city),
'Telefon': bool(company.phone),
'Email': bool(company.email),
'Strona WWW': bool(company.website),
'Opis': bool(company.description_short),
'Kategoria': bool(company.category_id),
'Logo': enrichment['logo']['done'],
'Dane urzędowe': enrichment['registry']['done'],
'Audyt SEO': enrichment['seo']['done'],
'Audyt Social': enrichment['social']['done'],
'Audyt GBP': enrichment['gbp']['done'],
}
completeness = {
'score': int(sum(fields.values()) / len(fields) * 100),
'fields': fields,
'total': len(fields),
'filled': sum(fields.values()),
}
logger.info(f"Admin {current_user.email} viewed company detail: {company.name} (ID: {company_id})") logger.info(f"Admin {current_user.email} viewed company detail: {company.name} (ID: {company_id})")

View File

@ -0,0 +1,184 @@
"""
Admin Data Quality Dashboard
=============================
Aggregate view of company data quality and completeness across all companies.
"""
import os
import logging
from datetime import datetime
from flask import render_template
from flask_login import login_required
from sqlalchemy import func
from . import bp
from database import (
SessionLocal, Company, CompanyWebsiteAnalysis,
CompanySocialMedia, GBPAudit, SystemRole
)
from utils.decorators import role_required
logger = logging.getLogger(__name__)
LOGO_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'static', 'img', 'companies')
def _check_logo_exists(slug):
"""Check if company logo file exists on disk."""
if not slug:
return False
for ext in ('webp', 'svg'):
if os.path.isfile(os.path.join(LOGO_DIR, f'{slug}.{ext}')):
return True
return False
@bp.route('/data-quality')
@login_required
@role_required(SystemRole.ADMIN)
def admin_data_quality():
"""Data quality dashboard with aggregate stats."""
db = SessionLocal()
try:
now = datetime.now()
# Load all active/pending companies with minimal fields
companies = db.query(Company).filter(
Company.status.in_(['active', 'pending'])
).order_by(Company.name).all()
total = len(companies)
if total == 0:
return render_template(
'admin/data_quality_dashboard.html',
total=0, field_stats={}, quality_dist={},
score_dist={}, avg_score=0, companies_table=[],
now=now,
)
# Batch query: companies with SEO analysis
seo_company_ids = set(
row[0] for row in db.query(CompanyWebsiteAnalysis.company_id).all()
)
# Batch query: companies with social media profiles
social_counts = dict(
db.query(
CompanySocialMedia.company_id,
func.count(CompanySocialMedia.id)
).group_by(CompanySocialMedia.company_id).all()
)
# Batch query: companies with GBP audit
gbp_company_ids = set(
row[0] for row in db.query(GBPAudit.company_id).distinct().all()
)
# Per-field coverage counters
field_counters = {
'NIP': 0,
'Adres': 0,
'Telefon': 0,
'Email': 0,
'Strona WWW': 0,
'Opis': 0,
'Kategoria': 0,
'Logo': 0,
'Dane urzędowe': 0,
'Audyt SEO': 0,
'Audyt Social': 0,
'Audyt GBP': 0,
}
# Quality distribution
quality_dist = {'basic': 0, 'enhanced': 0, 'complete': 0}
score_dist = {'0-25': 0, '26-50': 0, '51-75': 0, '76-100': 0}
score_sum = 0
# Per-company table data
companies_table = []
for c in companies:
# Compute 12-field check
fields = {
'NIP': bool(c.nip),
'Adres': bool(c.address_city),
'Telefon': bool(c.phone),
'Email': bool(c.email),
'Strona WWW': bool(c.website),
'Opis': bool(c.description_short),
'Kategoria': bool(c.category_id),
'Logo': _check_logo_exists(c.slug),
'Dane urzędowe': bool(c.ceidg_fetched_at or c.krs_fetched_at),
'Audyt SEO': c.id in seo_company_ids,
'Audyt Social': social_counts.get(c.id, 0) > 0,
'Audyt GBP': c.id in gbp_company_ids,
}
filled = sum(fields.values())
score = int(filled / len(fields) * 100)
# Update counters
for field_name, has_value in fields.items():
if has_value:
field_counters[field_name] += 1
# Quality label
if score < 34:
label = 'basic'
elif score < 67:
label = 'enhanced'
else:
label = 'complete'
quality_dist[label] = quality_dist.get(label, 0) + 1
# Score distribution
if score <= 25:
score_dist['0-25'] += 1
elif score <= 50:
score_dist['26-50'] += 1
elif score <= 75:
score_dist['51-75'] += 1
else:
score_dist['76-100'] += 1
score_sum += score
companies_table.append({
'id': c.id,
'name': c.name,
'slug': c.slug,
'score': score,
'filled': filled,
'total': len(fields),
'label': label,
'data_quality': c.data_quality or 'basic',
'fields': fields,
'status': c.status,
})
# Sort by score ascending (most incomplete first)
companies_table.sort(key=lambda x: x['score'])
# Field stats as percentages
field_stats = {
name: {'count': count, 'pct': round(count / total * 100)}
for name, count in field_counters.items()
}
avg_score = round(score_sum / total) if total > 0 else 0
return render_template(
'admin/data_quality_dashboard.html',
total=total,
field_stats=field_stats,
quality_dist=quality_dist,
score_dist=score_dist,
avg_score=avg_score,
companies_table=companies_table,
now=now,
)
finally:
db.close()

View File

@ -23,6 +23,7 @@ from database import (
from datetime import timedelta from datetime import timedelta
import gemini_service import gemini_service
import krs_api_service import krs_api_service
from utils.data_quality import update_company_data_quality
from ceidg_api_service import fetch_ceidg_by_nip from ceidg_api_service import fetch_ceidg_by_nip
from . import bp from . import bp
@ -589,6 +590,9 @@ def api_enrich_company_registry(company_id):
db.commit() db.commit()
update_company_data_quality(company, db)
db.commit()
logger.info(f"Registry enrichment for company {company.id} ({company.name}) from {source} by {current_user.email}") logger.info(f"Registry enrichment for company {company.id} ({company.name}) from {source} by {current_user.email}")
return jsonify({ return jsonify({

View File

@ -11,6 +11,7 @@ from blueprints.public import bp
from sqlalchemy import or_ from sqlalchemy import or_
from database import SessionLocal, Company, CompanyContact, CompanySocialMedia, CompanyWebsite, Category from database import SessionLocal, Company, CompanyContact, CompanySocialMedia, CompanyWebsite, Category
from utils.helpers import sanitize_input, sanitize_html, validate_email, ensure_url from utils.helpers import sanitize_input, sanitize_html, validate_email, ensure_url
from utils.data_quality import update_company_data_quality
from datetime import datetime from datetime import datetime
import logging import logging
@ -180,6 +181,10 @@ def company_edit_save(company_id=None):
_save_social_media(db, company) _save_social_media(db, company)
db.commit() db.commit()
update_company_data_quality(company, db)
db.commit()
flash('Dane firmy zostały zaktualizowane.', 'success') flash('Dane firmy zostały zaktualizowane.', 'success')
return redirect(url_for('public.company_detail', company_id=company.id)) return redirect(url_for('public.company_detail', company_id=company.id))

View File

@ -1320,6 +1320,24 @@ class CompanyQualityTracking(Base):
company = relationship('Company', back_populates='quality_tracking') company = relationship('Company', back_populates='quality_tracking')
class BulkEnrichmentJob(Base):
"""Tracks bulk enrichment jobs started from admin dashboard."""
__tablename__ = 'bulk_enrichment_jobs'
id = Column(Integer, primary_key=True)
started_at = Column(DateTime, default=datetime.now)
started_by = Column(Integer, ForeignKey('users.id'))
status = Column(String(20), default='running') # running, completed, failed
total_companies = Column(Integer, default=0)
processed_companies = Column(Integer, default=0)
steps = Column(PG_JSONB) # ['registry', 'seo', 'social', 'gbp', 'logo']
results = Column(PG_JSONB, default={}) # {company_id: {step: result, ...}, ...}
completed_at = Column(DateTime)
# Relationship
user = relationship('User')
class CompanyWebsiteContent(Base): class CompanyWebsiteContent(Base):
"""Scraped website content for companies""" """Scraped website content for companies"""
__tablename__ = 'company_website_content' __tablename__ = 'company_website_content'

View File

@ -0,0 +1,17 @@
-- 075: Create bulk_enrichment_jobs table for tracking batch enrichment operations
-- Run: python3 scripts/run_migration.py database/migrations/075_bulk_enrichment_jobs.sql
CREATE TABLE IF NOT EXISTS bulk_enrichment_jobs (
id SERIAL PRIMARY KEY,
started_at TIMESTAMP DEFAULT NOW(),
started_by INTEGER REFERENCES users(id),
status VARCHAR(20) DEFAULT 'running',
total_companies INTEGER DEFAULT 0,
processed_companies INTEGER DEFAULT 0,
steps JSONB,
results JSONB DEFAULT '{}'::jsonb,
completed_at TIMESTAMP
);
GRANT ALL ON TABLE bulk_enrichment_jobs TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE bulk_enrichment_jobs_id_seq TO nordabiz_app;

View File

@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Any
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from database import Company, GBPAudit, GBPReview, CompanyWebsiteAnalysis, SessionLocal, OAuthToken from database import Company, GBPAudit, GBPReview, CompanyWebsiteAnalysis, SessionLocal, OAuthToken
from utils.data_quality import update_company_data_quality
import gemini_service import gemini_service
try: try:
@ -2029,6 +2030,18 @@ def fetch_google_business_data(
db.commit() db.commit()
# Flow GBP phone/website to Company if Company fields are empty
try:
if analysis.google_phone and not company.phone:
company.phone = analysis.google_phone
if analysis.google_website and not company.website:
company.website = analysis.google_website
update_company_data_quality(company, db)
db.commit()
except Exception as flow_err:
logger.warning(f"Failed to flow GBP data to Company {company_id}: {flow_err}")
db.rollback()
result['steps'][-1]['status'] = 'complete' result['steps'][-1]['status'] = 'complete'
result['steps'][-1]['message'] = 'Dane zapisane pomyślnie' result['steps'][-1]['message'] = 'Dane zapisane pomyślnie'
result['success'] = True result['success'] = True

View File

@ -24,12 +24,27 @@ if scripts_dir not in sys.path:
from database import SessionLocal, Company, CompanyWebsiteAnalysis, CompanySocialMedia, CompanyPKD, CompanyPerson from database import SessionLocal, Company, CompanyWebsiteAnalysis, CompanySocialMedia, CompanyPKD, CompanyPerson
from database import GBPAudit from database import GBPAudit
from utils.data_quality import update_company_data_quality
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger('arm_company') logger = logging.getLogger('arm_company')
def arm_company(company_id, force=False): ALL_STEPS = ['registry', 'seo', 'social', 'gbp', 'logo']
def arm_company(company_id, force=False, steps=None, progress_callback=None):
"""Arm a company with enrichment data.
Args:
company_id: Company ID to enrich
force: Force re-execution of all steps
steps: List of steps to run (default: all). Options: registry, seo, social, gbp, logo
progress_callback: Optional callback(company_id, step, result_text) for bulk tracking
"""
if steps is None:
steps = ALL_STEPS
db = SessionLocal() db = SessionLocal()
try: try:
company = db.query(Company).filter_by(id=company_id).first() company = db.query(Company).filter_by(id=company_id).first()
@ -41,13 +56,17 @@ def arm_company(company_id, force=False):
print("Uzbrajam: %s (ID: %d)" % (company.name, company.id)) print("Uzbrajam: %s (ID: %d)" % (company.name, company.id))
print("NIP: %s | WWW: %s" % (company.nip or '-', company.website or '-')) print("NIP: %s | WWW: %s" % (company.nip or '-', company.website or '-'))
print("Tryb: %s" % ("FORCE (wszystkie kroki)" if force else "SMART (tylko brakujące)")) print("Tryb: %s" % ("FORCE (wszystkie kroki)" if force else "SMART (tylko brakujące)"))
if steps != ALL_STEPS:
print("Kroki: %s" % ', '.join(steps))
print("=" * 60) print("=" * 60)
results = {} results = {}
# --- Krok 1: Dane urzędowe --- # --- Krok 1: Dane urzędowe ---
registry_done = bool(company.krs_fetched_at or company.ceidg_fetched_at) registry_done = bool(company.krs_fetched_at or company.ceidg_fetched_at)
if force or not registry_done: if 'registry' not in steps:
results['registry'] = 'SKIP (nie wybrano)'
elif force or not registry_done:
if company.nip: if company.nip:
print("\n[1/5] Pobieranie danych urzędowych...") print("\n[1/5] Pobieranie danych urzędowych...")
try: try:
@ -194,7 +213,9 @@ def arm_company(company_id, force=False):
# --- Krok 2: Audyt SEO --- # --- Krok 2: Audyt SEO ---
seo_done = db.query(CompanyWebsiteAnalysis).filter_by(company_id=company.id).first() is not None seo_done = db.query(CompanyWebsiteAnalysis).filter_by(company_id=company.id).first() is not None
if force or not seo_done: if 'seo' not in steps:
results['seo'] = 'SKIP (nie wybrano)'
elif force or not seo_done:
if company.website: if company.website:
print("\n[2/5] Audyt SEO...") print("\n[2/5] Audyt SEO...")
try: try:
@ -224,7 +245,9 @@ def arm_company(company_id, force=False):
# --- Krok 3: Social Media --- # --- Krok 3: Social Media ---
social_done = db.query(CompanySocialMedia).filter_by(company_id=company.id).count() > 0 social_done = db.query(CompanySocialMedia).filter_by(company_id=company.id).count() > 0
if force or not social_done: if 'social' not in steps:
results['social'] = 'SKIP (nie wybrano)'
elif force or not social_done:
print("\n[3/5] Audyt Social Media...") print("\n[3/5] Audyt Social Media...")
try: try:
from social_media_audit import SocialMediaAuditor from social_media_audit import SocialMediaAuditor
@ -254,7 +277,9 @@ def arm_company(company_id, force=False):
# --- Krok 4: GBP --- # --- Krok 4: GBP ---
gbp_done = db.query(GBPAudit).filter_by(company_id=company.id).first() is not None gbp_done = db.query(GBPAudit).filter_by(company_id=company.id).first() is not None
if force or not gbp_done: if 'gbp' not in steps:
results['gbp'] = 'SKIP (nie wybrano)'
elif force or not gbp_done:
print("\n[4/5] Audyt GBP...") print("\n[4/5] Audyt GBP...")
try: try:
from gbp_audit_service import GBPAuditService from gbp_audit_service import GBPAuditService
@ -284,7 +309,9 @@ def arm_company(company_id, force=False):
logo_done = True logo_done = True
break break
if force or not logo_done: if 'logo' not in steps:
results['logo'] = 'SKIP (nie wybrano)'
elif force or not logo_done:
if company.website: if company.website:
print("\n[5/5] Pobieranie logo...") print("\n[5/5] Pobieranie logo...")
try: try:
@ -322,7 +349,15 @@ def arm_company(company_id, force=False):
print(" Wynik: %d/5 kroków zaliczonych" % ok_count) print(" Wynik: %d/5 kroków zaliczonych" % ok_count)
print("=" * 60) print("=" * 60)
return True # Update data quality score
dq = update_company_data_quality(company, db)
db.commit()
print("\n Data quality: %s (%d%%)" % (company.data_quality, dq['score']))
if progress_callback:
progress_callback(company_id, results)
return results
except Exception as e: except Exception as e:
logger.error("Błąd uzbrajania firmy %d: %s" % (company_id, str(e))) logger.error("Błąd uzbrajania firmy %d: %s" % (company_id, str(e)))

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python3
"""
Backfill data quality scores for all companies.
Usage:
python3 scripts/backfill_data_quality_scores.py
"""
import sys
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, BASE_DIR)
from database import SessionLocal, Company
from utils.data_quality import update_company_data_quality
BATCH_SIZE = 50
def main():
db = SessionLocal()
try:
companies = db.query(Company).all()
total = len(companies)
print(f"Backfilling data quality for {total} companies...")
for i, company in enumerate(companies, 1):
result = update_company_data_quality(company, db)
if i % BATCH_SIZE == 0:
db.commit()
print(f" [{i}/{total}] committed batch")
db.commit()
print(f"Done. {total} companies updated.")
# Summary
basic = sum(1 for c in companies if c.data_quality == 'basic')
enhanced = sum(1 for c in companies if c.data_quality == 'enhanced')
complete = sum(1 for c in companies if c.data_quality == 'complete')
print(f"\nSummary: basic={basic}, enhanced={enhanced}, complete={complete}")
finally:
db.close()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,717 @@
{% extends "base.html" %}
{% block title %}Jakość danych - Admin{% endblock %}
{% block extra_css %}
<style>
.dq-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: var(--spacing-xl);
flex-wrap: wrap;
gap: var(--spacing-md);
}
.dq-header h1 {
font-size: var(--font-size-2xl);
color: var(--text-primary);
margin-bottom: var(--spacing-xs);
}
.dq-header p {
color: var(--text-secondary);
}
.dq-timestamp {
text-align: right;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-lg);
padding: var(--spacing-md) var(--spacing-lg);
font-size: var(--font-size-sm);
color: var(--text-secondary);
}
/* --- Stat Cards --- */
.dq-stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: var(--spacing-lg);
margin-bottom: var(--spacing-xl);
}
.dq-stat-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-xl);
padding: var(--spacing-lg);
text-align: center;
position: relative;
overflow: hidden;
}
.dq-stat-card::before {
content: '';
position: absolute;
top: 0; left: 0; right: 0;
height: 4px;
}
.dq-stat-card.total::before { background: linear-gradient(90deg, #3b82f6, #8b5cf6); }
.dq-stat-card.avg::before { background: linear-gradient(90deg, #10b981, #14b8a6); }
.dq-stat-card.complete::before { background: linear-gradient(90deg, #22c55e, #16a34a); }
.dq-stat-card.incomplete::before { background: linear-gradient(90deg, #f59e0b, #f97316); }
.dq-stat-value {
font-size: var(--font-size-3xl);
font-weight: 700;
color: var(--text-primary);
line-height: 1;
margin-bottom: var(--spacing-xs);
}
.dq-stat-label {
font-size: var(--font-size-sm);
color: var(--text-secondary);
}
/* --- Coverage Bars --- */
.dq-section {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-xl);
padding: var(--spacing-xl);
margin-bottom: var(--spacing-xl);
}
.dq-section-title {
font-size: var(--font-size-lg);
font-weight: 600;
color: var(--text-primary);
margin-bottom: var(--spacing-lg);
}
.dq-bar-row {
display: flex;
align-items: center;
gap: var(--spacing-md);
margin-bottom: var(--spacing-md);
}
.dq-bar-label {
width: 140px;
font-size: var(--font-size-sm);
color: var(--text-secondary);
text-align: right;
flex-shrink: 0;
}
.dq-bar-track {
flex: 1;
height: 24px;
background: var(--background);
border-radius: var(--radius);
overflow: hidden;
position: relative;
}
.dq-bar-fill {
height: 100%;
border-radius: var(--radius);
transition: width 0.5s ease;
display: flex;
align-items: center;
justify-content: flex-end;
padding-right: var(--spacing-sm);
font-size: var(--font-size-xs);
font-weight: 600;
color: white;
min-width: 40px;
}
.dq-bar-fill.high { background: linear-gradient(90deg, #22c55e, #16a34a); }
.dq-bar-fill.medium { background: linear-gradient(90deg, #f59e0b, #d97706); }
.dq-bar-fill.low { background: linear-gradient(90deg, #ef4444, #dc2626); }
.dq-bar-count {
width: 80px;
font-size: var(--font-size-sm);
color: var(--text-secondary);
flex-shrink: 0;
}
/* --- Distribution --- */
.dq-dist-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: var(--spacing-md);
}
.dq-dist-card {
padding: var(--spacing-lg);
border-radius: var(--radius-lg);
text-align: center;
}
.dq-dist-card.basic { background: #fef2f2; border: 1px solid #fecaca; }
.dq-dist-card.enhanced { background: #fffbeb; border: 1px solid #fde68a; }
.dq-dist-card.complete { background: #f0fdf4; border: 1px solid #bbf7d0; }
.dq-dist-value {
font-size: var(--font-size-2xl);
font-weight: 700;
}
.dq-dist-card.basic .dq-dist-value { color: #dc2626; }
.dq-dist-card.enhanced .dq-dist-value { color: #d97706; }
.dq-dist-card.complete .dq-dist-value { color: #16a34a; }
.dq-dist-label {
font-size: var(--font-size-sm);
color: var(--text-secondary);
margin-top: var(--spacing-xs);
}
/* --- Score Distribution --- */
.dq-score-dist {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: var(--spacing-md);
margin-top: var(--spacing-lg);
}
.dq-score-bucket {
text-align: center;
padding: var(--spacing-md);
background: var(--background);
border-radius: var(--radius-lg);
}
.dq-score-bucket-value {
font-size: var(--font-size-xl);
font-weight: 700;
color: var(--text-primary);
}
.dq-score-bucket-label {
font-size: var(--font-size-xs);
color: var(--text-secondary);
}
/* --- Companies Table --- */
.dq-table-controls {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: var(--spacing-md);
flex-wrap: wrap;
gap: var(--spacing-sm);
}
.dq-filter-select {
padding: var(--spacing-xs) var(--spacing-md);
border: 1px solid var(--border);
border-radius: var(--radius);
background: var(--surface);
font-size: var(--font-size-sm);
color: var(--text-primary);
}
.dq-table {
width: 100%;
border-collapse: collapse;
}
.dq-table th {
text-align: left;
padding: var(--spacing-sm) var(--spacing-md);
font-size: var(--font-size-xs);
color: var(--text-secondary);
text-transform: uppercase;
letter-spacing: 0.5px;
border-bottom: 2px solid var(--border);
cursor: pointer;
user-select: none;
}
.dq-table th:hover {
color: var(--text-primary);
}
.dq-table td {
padding: var(--spacing-sm) var(--spacing-md);
font-size: var(--font-size-sm);
border-bottom: 1px solid var(--border);
vertical-align: middle;
}
.dq-table tr:hover {
background: var(--background);
}
.dq-score-badge {
display: inline-flex;
align-items: center;
gap: var(--spacing-xs);
padding: 2px 10px;
border-radius: 999px;
font-size: var(--font-size-xs);
font-weight: 600;
}
.dq-score-badge.high { background: #dcfce7; color: #166534; }
.dq-score-badge.medium { background: #fef9c3; color: #854d0e; }
.dq-score-badge.low { background: #fee2e2; color: #991b1b; }
.dq-quality-badge {
display: inline-block;
padding: 2px 8px;
border-radius: var(--radius);
font-size: var(--font-size-xs);
font-weight: 500;
}
.dq-quality-badge.basic { background: #fee2e2; color: #991b1b; }
.dq-quality-badge.enhanced { background: #fef9c3; color: #854d0e; }
.dq-quality-badge.complete { background: #dcfce7; color: #166534; }
.dq-field-dots {
display: flex;
gap: 3px;
flex-wrap: wrap;
}
.dq-field-dot {
width: 8px;
height: 8px;
border-radius: 50%;
flex-shrink: 0;
}
.dq-field-dot.filled { background: #22c55e; }
.dq-field-dot.empty { background: #e5e7eb; }
.dq-company-link {
color: var(--primary);
text-decoration: none;
font-weight: 500;
}
.dq-company-link:hover {
text-decoration: underline;
}
.dq-bulk-bar {
display: none;
align-items: center;
gap: var(--spacing-md);
padding: var(--spacing-md) var(--spacing-lg);
background: var(--primary);
color: white;
border-radius: var(--radius-lg);
margin-bottom: var(--spacing-md);
}
.dq-bulk-bar.active {
display: flex;
}
.dq-bulk-btn {
padding: var(--spacing-xs) var(--spacing-md);
background: white;
color: var(--primary);
border: none;
border-radius: var(--radius);
font-size: var(--font-size-sm);
font-weight: 600;
cursor: pointer;
}
.dq-bulk-btn:hover {
background: #f0f0f0;
}
/* Pagination */
.dq-pagination {
display: flex;
justify-content: center;
gap: var(--spacing-xs);
margin-top: var(--spacing-lg);
}
.dq-page-btn {
padding: var(--spacing-xs) var(--spacing-md);
border: 1px solid var(--border);
border-radius: var(--radius);
background: var(--surface);
color: var(--text-primary);
font-size: var(--font-size-sm);
cursor: pointer;
}
.dq-page-btn.active {
background: var(--primary);
color: white;
border-color: var(--primary);
}
.dq-page-btn:hover:not(.active) {
background: var(--background);
}
/* Responsive */
@media (max-width: 768px) {
.dq-bar-label { width: 100px; font-size: var(--font-size-xs); }
.dq-stats-grid { grid-template-columns: repeat(2, 1fr); }
.dq-score-dist { grid-template-columns: repeat(2, 1fr); }
.dq-table { font-size: var(--font-size-xs); }
.dq-table td, .dq-table th { padding: var(--spacing-xs); }
}
</style>
{% endblock %}
{% block content %}
<div class="dq-header">
<div>
<h1>Jakość danych firm</h1>
<p>Przegląd kompletności i jakości danych {{ total }} firm w katalogu</p>
</div>
<div class="dq-timestamp">
Stan na {{ now.strftime('%d.%m.%Y, %H:%M') }}
</div>
</div>
<!-- Stat Cards -->
<div class="dq-stats-grid">
<div class="dq-stat-card total">
<div class="dq-stat-value">{{ total }}</div>
<div class="dq-stat-label">Firm w katalogu</div>
</div>
<div class="dq-stat-card avg">
<div class="dq-stat-value">{{ avg_score }}%</div>
<div class="dq-stat-label">Średnia kompletność</div>
</div>
<div class="dq-stat-card complete">
<div class="dq-stat-value">{{ quality_dist.get('complete', 0) }}</div>
<div class="dq-stat-label">Kompletnych (67%+)</div>
</div>
<div class="dq-stat-card incomplete">
<div class="dq-stat-value">{{ quality_dist.get('basic', 0) }}</div>
<div class="dq-stat-label">Podstawowych (&lt;34%)</div>
</div>
</div>
<!-- Field Coverage -->
<div class="dq-section">
<div class="dq-section-title">Pokrycie danych per pole</div>
{% for field_name, stats in field_stats.items() %}
<div class="dq-bar-row">
<div class="dq-bar-label">{{ field_name }}</div>
<div class="dq-bar-track">
<div class="dq-bar-fill {% if stats.pct >= 70 %}high{% elif stats.pct >= 40 %}medium{% else %}low{% endif %}"
style="width: {{ stats.pct }}%">
{{ stats.pct }}%
</div>
</div>
<div class="dq-bar-count">{{ stats.count }}/{{ total }}</div>
</div>
{% endfor %}
</div>
<!-- Quality Distribution -->
<div class="dq-section">
<div class="dq-section-title">Rozkład jakości danych</div>
<div class="dq-dist-grid">
<div class="dq-dist-card basic">
<div class="dq-dist-value">{{ quality_dist.get('basic', 0) }}</div>
<div class="dq-dist-label">Podstawowe (&lt;34%)</div>
</div>
<div class="dq-dist-card enhanced">
<div class="dq-dist-value">{{ quality_dist.get('enhanced', 0) }}</div>
<div class="dq-dist-label">Rozszerzone (34-66%)</div>
</div>
<div class="dq-dist-card complete">
<div class="dq-dist-value">{{ quality_dist.get('complete', 0) }}</div>
<div class="dq-dist-label">Kompletne (67%+)</div>
</div>
</div>
<div class="dq-score-dist">
<div class="dq-score-bucket">
<div class="dq-score-bucket-value">{{ score_dist.get('0-25', 0) }}</div>
<div class="dq-score-bucket-label">0-25%</div>
</div>
<div class="dq-score-bucket">
<div class="dq-score-bucket-value">{{ score_dist.get('26-50', 0) }}</div>
<div class="dq-score-bucket-label">26-50%</div>
</div>
<div class="dq-score-bucket">
<div class="dq-score-bucket-value">{{ score_dist.get('51-75', 0) }}</div>
<div class="dq-score-bucket-label">51-75%</div>
</div>
<div class="dq-score-bucket">
<div class="dq-score-bucket-value">{{ score_dist.get('76-100', 0) }}</div>
<div class="dq-score-bucket-label">76-100%</div>
</div>
</div>
</div>
<!-- Companies Table -->
<div class="dq-section">
<div class="dq-section-title">Firmy wg kompletności danych</div>
<!-- Bulk action bar -->
<div class="dq-bulk-bar" id="bulkBar">
<span id="selectedCount">0</span> zaznaczonych
<button class="dq-bulk-btn" onclick="openBulkEnrich()">Uzbrój zaznaczone</button>
<button class="dq-bulk-btn" onclick="clearSelection()" style="background: transparent; color: white; border: 1px solid rgba(255,255,255,0.5);">Odznacz</button>
</div>
<div class="dq-table-controls">
<div>
<select class="dq-filter-select" id="qualityFilter" onchange="filterTable()">
<option value="all">Wszystkie poziomy</option>
<option value="basic">Podstawowe</option>
<option value="enhanced">Rozszerzone</option>
<option value="complete">Kompletne</option>
</select>
</div>
<div style="font-size: var(--font-size-sm); color: var(--text-secondary);">
Pokazano <span id="shownCount">{{ companies_table|length }}</span> z {{ total }} firm
</div>
</div>
<table class="dq-table" id="companiesTable">
<thead>
<tr>
<th style="width: 30px"><input type="checkbox" id="selectAll" onchange="toggleSelectAll()"></th>
<th onclick="sortTable(1)">Firma</th>
<th onclick="sortTable(2)" style="width: 100px">Score</th>
<th onclick="sortTable(3)" style="width: 80px">Pola</th>
<th style="width: 130px">Kompletność</th>
<th onclick="sortTable(5)" style="width: 100px">Jakość</th>
</tr>
</thead>
<tbody>
{% for c in companies_table %}
<tr data-quality="{{ c.label }}">
<td><input type="checkbox" class="company-cb" value="{{ c.id }}"></td>
<td>
<a href="{{ url_for('admin.admin_company_detail', company_id=c.id) }}" class="dq-company-link">
{{ c.name }}
</a>
</td>
<td>
<span class="dq-score-badge {% if c.score >= 67 %}high{% elif c.score >= 34 %}medium{% else %}low{% endif %}">
{{ c.score }}%
</span>
</td>
<td>{{ c.filled }}/{{ c.total }}</td>
<td>
<div class="dq-field-dots" title="{% for fname, fval in c.fields.items() %}{{ fname }}: {{ 'tak' if fval else 'nie' }}&#10;{% endfor %}">
{% for fname, fval in c.fields.items() %}
<span class="dq-field-dot {{ 'filled' if fval else 'empty' }}" title="{{ fname }}"></span>
{% endfor %}
</div>
</td>
<td>
<span class="dq-quality-badge {{ c.label }}">
{% if c.label == 'basic' %}Podstawowe{% elif c.label == 'enhanced' %}Rozszerzone{% else %}Kompletne{% endif %}
</span>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<!-- Bulk Enrich Modal -->
<div id="bulkModal" style="display: none; position: fixed; top: 0; left: 0; right: 0; bottom: 0; background: rgba(0,0,0,0.5); z-index: 9999; align-items: center; justify-content: center;">
<div style="background: var(--surface); border-radius: var(--radius-xl); padding: var(--spacing-xl); max-width: 500px; width: 90%; max-height: 80vh; overflow-y: auto;">
<h3 style="margin-bottom: var(--spacing-lg);">Uzbrój zaznaczone firmy</h3>
<p style="color: var(--text-secondary); margin-bottom: var(--spacing-lg);">
Wybierz kroki enrichmentu do wykonania dla <strong id="modalCount">0</strong> firm:
</p>
<div style="display: flex; flex-direction: column; gap: var(--spacing-sm); margin-bottom: var(--spacing-xl);">
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-registry" checked> Dane z rejestrów (CEIDG/KRS)
</label>
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-seo" checked> Audyt SEO
</label>
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-social" checked> Audyt Social Media
</label>
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-gbp" checked> Audyt GBP
</label>
<label style="display: flex; align-items: center; gap: var(--spacing-sm); cursor: pointer;">
<input type="checkbox" id="step-logo"> Pobierz logo
</label>
</div>
<div style="display: flex; gap: var(--spacing-md); justify-content: flex-end;">
<button onclick="closeBulkModal()" style="padding: var(--spacing-sm) var(--spacing-lg); border: 1px solid var(--border); border-radius: var(--radius); background: var(--surface); cursor: pointer;">Anuluj</button>
<button onclick="startBulkEnrich()" style="padding: var(--spacing-sm) var(--spacing-lg); border: none; border-radius: var(--radius); background: var(--primary); color: white; font-weight: 600; cursor: pointer;">Rozpocznij</button>
</div>
<!-- Progress section -->
<div id="bulkProgress" style="display: none; margin-top: var(--spacing-xl); padding-top: var(--spacing-lg); border-top: 1px solid var(--border);">
<div style="display: flex; justify-content: space-between; margin-bottom: var(--spacing-sm);">
<span style="font-weight: 600;">Postęp</span>
<span id="progressText">0/0</span>
</div>
<div style="height: 8px; background: var(--background); border-radius: 4px; overflow: hidden;">
<div id="progressBar" style="height: 100%; background: var(--primary); border-radius: 4px; transition: width 0.3s; width: 0%;"></div>
</div>
<div id="progressLog" style="margin-top: var(--spacing-md); max-height: 200px; overflow-y: auto; font-size: var(--font-size-xs); font-family: monospace; color: var(--text-secondary);"></div>
</div>
</div>
</div>
{% endblock %}
{% block extra_js %}
// Data Quality Dashboard JS
function filterTable() {
var filter = document.getElementById('qualityFilter').value;
var rows = document.querySelectorAll('#companiesTable tbody tr');
var shown = 0;
rows.forEach(function(row) {
if (filter === 'all' || row.dataset.quality === filter) {
row.style.display = '';
shown++;
} else {
row.style.display = 'none';
}
});
document.getElementById('shownCount').textContent = shown;
}
function sortTable(colIdx) {
var table = document.getElementById('companiesTable');
var tbody = table.querySelector('tbody');
var rows = Array.from(tbody.querySelectorAll('tr'));
var asc = table.dataset.sortCol == colIdx && table.dataset.sortDir !== 'asc';
table.dataset.sortCol = colIdx;
table.dataset.sortDir = asc ? 'asc' : 'desc';
rows.sort(function(a, b) {
var aVal = a.cells[colIdx].textContent.trim().replace('%', '');
var bVal = b.cells[colIdx].textContent.trim().replace('%', '');
var aNum = parseFloat(aVal);
var bNum = parseFloat(bVal);
if (!isNaN(aNum) && !isNaN(bNum)) {
return asc ? aNum - bNum : bNum - aNum;
}
return asc ? aVal.localeCompare(bVal, 'pl') : bVal.localeCompare(aVal, 'pl');
});
rows.forEach(function(row) { tbody.appendChild(row); });
}
// Checkbox selection
function toggleSelectAll() {
var checked = document.getElementById('selectAll').checked;
document.querySelectorAll('.company-cb').forEach(function(cb) {
var row = cb.closest('tr');
if (row.style.display !== 'none') {
cb.checked = checked;
}
});
updateBulkBar();
}
document.addEventListener('change', function(e) {
if (e.target.classList.contains('company-cb')) {
updateBulkBar();
}
});
function updateBulkBar() {
var selected = document.querySelectorAll('.company-cb:checked').length;
var bar = document.getElementById('bulkBar');
document.getElementById('selectedCount').textContent = selected;
if (selected > 0) {
bar.classList.add('active');
} else {
bar.classList.remove('active');
}
}
function clearSelection() {
document.querySelectorAll('.company-cb').forEach(function(cb) { cb.checked = false; });
document.getElementById('selectAll').checked = false;
updateBulkBar();
}
// Bulk enrich modal
function openBulkEnrich() {
var selected = document.querySelectorAll('.company-cb:checked').length;
document.getElementById('modalCount').textContent = selected;
document.getElementById('bulkModal').style.display = 'flex';
document.getElementById('bulkProgress').style.display = 'none';
}
function closeBulkModal() {
document.getElementById('bulkModal').style.display = 'none';
}
function startBulkEnrich() {
var companyIds = [];
document.querySelectorAll('.company-cb:checked').forEach(function(cb) {
companyIds.push(parseInt(cb.value));
});
var steps = [];
if (document.getElementById('step-registry').checked) steps.push('registry');
if (document.getElementById('step-seo').checked) steps.push('seo');
if (document.getElementById('step-social').checked) steps.push('social');
if (document.getElementById('step-gbp').checked) steps.push('gbp');
if (document.getElementById('step-logo').checked) steps.push('logo');
if (companyIds.length === 0 || steps.length === 0) return;
document.getElementById('bulkProgress').style.display = 'block';
document.getElementById('progressText').textContent = '0/' + companyIds.length;
document.getElementById('progressLog').innerHTML = '';
fetch('/admin/data-quality/bulk-enrich', {
method: 'POST',
headers: {'Content-Type': 'application/json', 'X-CSRFToken': document.querySelector('meta[name=csrf-token]')?.content || ''},
body: JSON.stringify({company_ids: companyIds, steps: steps})
})
.then(function(r) { return r.json(); })
.then(function(data) {
if (data.job_id) {
pollProgress(data.job_id, companyIds.length);
}
})
.catch(function(err) {
document.getElementById('progressLog').innerHTML += '<div style="color: #ef4444;">Błąd: ' + err.message + '</div>';
});
}
function pollProgress(jobId, total) {
fetch('/admin/data-quality/bulk-enrich/status?job_id=' + jobId)
.then(function(r) { return r.json(); })
.then(function(data) {
var processed = data.processed || 0;
var pct = Math.round(processed / total * 100);
document.getElementById('progressBar').style.width = pct + '%';
document.getElementById('progressText').textContent = processed + '/' + total;
if (data.latest_result) {
var log = document.getElementById('progressLog');
log.innerHTML += '<div>' + data.latest_result + '</div>';
log.scrollTop = log.scrollHeight;
}
if (data.status === 'running') {
setTimeout(function() { pollProgress(jobId, total); }, 2000);
} else {
document.getElementById('progressLog').innerHTML += '<div style="color: #22c55e; font-weight: 600;">Zakończono!</div>';
}
});
}
{% endblock %}

View File

@ -1478,6 +1478,12 @@
</svg> </svg>
Firmy Firmy
</a> </a>
<a href="{{ url_for('admin.admin_data_quality') }}">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/>
</svg>
Jakość danych
</a>
{% if current_user.has_role(SystemRole.ADMIN) %} {% if current_user.has_role(SystemRole.ADMIN) %}
<a href="{{ url_for('admin.admin_users') }}"> <a href="{{ url_for('admin.admin_users') }}">
<svg fill="none" stroke="currentColor" viewBox="0 0 24 24"> <svg fill="none" stroke="currentColor" viewBox="0 0 24 24">

View File

@ -1164,6 +1164,64 @@
</div> </div>
{% endif %} {% endif %}
<!-- Godziny otwarcia z Google Business Profile -->
{% if website_analysis and website_analysis.google_opening_hours and website_analysis.google_opening_hours.weekday_text %}
{% set day_translations = {
'Monday': 'Poniedziałek',
'Tuesday': 'Wtorek',
'Wednesday': 'Środa',
'Thursday': 'Czwartek',
'Friday': 'Piątek',
'Saturday': 'Sobota',
'Sunday': 'Niedziela'
} %}
<div class="company-section">
<h2 class="section-title">Godziny otwarcia</h2>
<div style="background: var(--background); border-radius: var(--radius-lg); padding: var(--spacing-lg); border: 1px solid var(--border);">
<div style="display: flex; align-items: flex-start; gap: var(--spacing-md);">
<div style="width: 48px; height: 48px; border-radius: 12px; background: linear-gradient(135deg, #10b981, #059669); display: flex; align-items: center; justify-content: center; flex-shrink: 0;">
<svg width="24" height="24" fill="none" stroke="white" stroke-width="2" viewBox="0 0 24 24">
<circle cx="12" cy="12" r="10"/>
<polyline points="12 6 12 12 16 14"/>
</svg>
</div>
<div style="flex: 1;">
<div style="display: grid; gap: 4px;">
{% for line in website_analysis.google_opening_hours.weekday_text %}
{% set parts = line.split(': ', 1) %}
{% set day_en = parts[0] if parts|length > 1 else '' %}
{% set hours_text = parts[1] if parts|length > 1 else line %}
{% set day_pl = day_translations.get(day_en, day_en) %}
{% set is_closed = 'Closed' in hours_text %}
<div style="display: flex; justify-content: space-between; align-items: center; padding: 6px 0; {% if not loop.last %}border-bottom: 1px solid var(--border);{% endif %}">
<span style="font-weight: 600; color: var(--text-primary); min-width: 120px;">{{ day_pl }}</span>
<span style="color: {% if is_closed %}#ef4444{% else %}var(--text-secondary){% endif %};">
{% if is_closed %}Zamknięte{% else %}{{ hours_text }}{% endif %}
</span>
</div>
{% endfor %}
</div>
{% if website_analysis.google_opening_hours.open_now is not none %}
<div style="margin-top: var(--spacing-sm); padding-top: var(--spacing-sm); border-top: 1px solid var(--border);">
{% if website_analysis.google_opening_hours.open_now %}
<span style="display: inline-flex; align-items: center; gap: 6px; color: #10b981; font-weight: 600; font-size: var(--font-size-sm);">
<span style="width: 8px; height: 8px; border-radius: 50%; background: #10b981; display: inline-block;"></span>
Teraz otwarte
</span>
{% else %}
<span style="display: inline-flex; align-items: center; gap: 6px; color: #ef4444; font-weight: 600; font-size: var(--font-size-sm);">
<span style="width: 8px; height: 8px; border-radius: 50%; background: #ef4444; display: inline-block;"></span>
Teraz zamknięte
</span>
{% endif %}
</div>
{% endif %}
</div>
</div>
</div>
</div>
{% endif %}
<!-- O firmie - Single Description (prioritized sources) --> <!-- O firmie - Single Description (prioritized sources) -->
{% set about_description = company.description_full or (ai_insights.business_summary if ai_insights else none) or (website_analysis.content_summary if website_analysis else none) %} {% set about_description = company.description_full or (ai_insights.business_summary if ai_insights else none) or (website_analysis.content_summary if website_analysis else none) %}
{% set _about_hidden = company.is_section_hidden('about') %} {% set _about_hidden = company.is_section_hidden('about') %}

88
utils/data_quality.py Normal file
View File

@ -0,0 +1,88 @@
"""
Data Quality Service
====================
Computes and updates company data quality scores.
Extracted from inline completeness logic in admin routes.
"""
import os
from database import CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit
def compute_data_quality_score(company, db):
"""Compute data quality score for a company.
Returns dict with 'score' (0-100), 'fields' (name->bool), 'total', 'filled'.
"""
# Logo check (webp or svg)
logo_exists = False
for ext in ('webp', 'svg'):
if os.path.isfile(os.path.join('static', 'img', 'companies', f'{company.slug}.{ext}')):
logo_exists = True
break
# Registry data
registry_done = bool(company.ceidg_fetched_at or company.krs_fetched_at)
# SEO audit
seo_done = db.query(CompanyWebsiteAnalysis).filter(
CompanyWebsiteAnalysis.company_id == company.id
).first() is not None
# Social media audit
social_done = db.query(CompanySocialMedia).filter(
CompanySocialMedia.company_id == company.id
).count() > 0
# GBP audit
gbp_done = db.query(GBPAudit).filter(
GBPAudit.company_id == company.id
).first() is not None
fields = {
'NIP': bool(company.nip),
'Adres': bool(company.address_city),
'Telefon': bool(company.phone),
'Email': bool(company.email),
'Strona WWW': bool(company.website),
'Opis': bool(company.description_short),
'Kategoria': bool(company.category_id),
'Logo': logo_exists,
'Dane urzędowe': registry_done,
'Audyt SEO': seo_done,
'Audyt Social': social_done,
'Audyt GBP': gbp_done,
}
filled = sum(fields.values())
total = len(fields)
score = int(filled / total * 100)
return {
'score': score,
'fields': fields,
'total': total,
'filled': filled,
}
def compute_data_quality_label(score):
"""Map numeric score to quality label."""
if score < 34:
return 'basic'
elif score < 67:
return 'enhanced'
return 'complete'
def update_company_data_quality(company, db):
"""Compute and persist data quality score on a company.
Returns the result dict from compute_data_quality_score.
"""
result = compute_data_quality_score(company, db)
company.data_quality_score = result['score']
company.data_quality = compute_data_quality_label(result['score'])
return result