diff --git a/blueprints/admin/__init__.py b/blueprints/admin/__init__.py index 3bdc3a6..b67e862 100644 --- a/blueprints/admin/__init__.py +++ b/blueprints/admin/__init__.py @@ -29,3 +29,5 @@ from . import routes_membership # noqa: E402, F401 from . import routes_benefits # noqa: E402, F401 from . import routes_competitors # noqa: E402, F401 from . import routes_social_publisher # noqa: E402, F401 +from . import routes_data_quality # noqa: E402, F401 +from . import routes_bulk_enrichment # noqa: E402, F401 diff --git a/blueprints/admin/routes_bulk_enrichment.py b/blueprints/admin/routes_bulk_enrichment.py new file mode 100644 index 0000000..da3f498 --- /dev/null +++ b/blueprints/admin/routes_bulk_enrichment.py @@ -0,0 +1,193 @@ +""" +Admin Bulk Enrichment Routes +============================= + +Batch enrichment operations for multiple companies at once. +""" + +import logging +import threading +import time +from datetime import datetime + +from flask import request, jsonify +from flask_login import login_required, current_user + +from . import bp +from database import SessionLocal, Company, BulkEnrichmentJob, SystemRole +from utils.decorators import role_required + +logger = logging.getLogger(__name__) + + +def _run_bulk_enrichment(job_id, company_ids, steps): + """Background worker for bulk enrichment. Runs in a separate thread.""" + import sys + import os + base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + if base_dir not in sys.path: + sys.path.insert(0, base_dir) + scripts_dir = os.path.join(base_dir, 'scripts') + if scripts_dir not in sys.path: + sys.path.insert(0, scripts_dir) + + from scripts.arm_company import arm_company + + db = SessionLocal() + try: + job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first() + if not job: + logger.error(f"Bulk enrichment job {job_id} not found") + return + + all_results = {} + + for i, cid in enumerate(company_ids): + company = db.query(Company).filter_by(id=cid).first() + company_name = company.name if company else f"ID {cid}" + + try: + result = arm_company(cid, force=False, steps=steps) + if isinstance(result, dict): + all_results[str(cid)] = { + 'name': company_name, + 'results': result, + } + else: + all_results[str(cid)] = { + 'name': company_name, + 'results': {'error': 'Firma nie znaleziona' if not result else 'Nieznany błąd'}, + } + except Exception as e: + logger.error(f"Bulk enrichment error for company {cid}: {e}") + all_results[str(cid)] = { + 'name': company_name, + 'results': {'error': str(e)[:200]}, + } + + # Update progress + job.processed_companies = i + 1 + job.results = all_results + db.commit() + + # Delay between companies to respect API limits + if i < len(company_ids) - 1: + time.sleep(2) + + job.status = 'completed' + job.completed_at = datetime.now() + db.commit() + logger.info(f"Bulk enrichment job {job_id} completed: {len(company_ids)} companies") + + except Exception as e: + logger.error(f"Bulk enrichment job {job_id} failed: {e}") + try: + job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first() + if job: + job.status = 'failed' + job.completed_at = datetime.now() + db.commit() + except Exception: + pass + finally: + db.close() + + +@bp.route('/data-quality/bulk-enrich', methods=['POST']) +@login_required +@role_required(SystemRole.ADMIN) +def bulk_enrich(): + """Start a bulk enrichment job.""" + data = request.get_json() + if not data: + return jsonify({'error': 'Brak danych'}), 400 + + company_ids = data.get('company_ids', []) + steps = data.get('steps', ['registry', 'seo', 'social', 'gbp', 'logo']) + + if not company_ids: + return jsonify({'error': 'Nie wybrano firm'}), 400 + + if len(company_ids) > 50: + return jsonify({'error': 'Maksymalnie 50 firm na raz'}), 400 + + valid_steps = {'registry', 'seo', 'social', 'gbp', 'logo'} + steps = [s for s in steps if s in valid_steps] + if not steps: + return jsonify({'error': 'Nie wybrano kroków'}), 400 + + db = SessionLocal() + try: + # Validate company IDs + existing = db.query(Company.id).filter(Company.id.in_(company_ids)).all() + existing_ids = [r[0] for r in existing] + if len(existing_ids) != len(company_ids): + missing = set(company_ids) - set(existing_ids) + return jsonify({'error': f'Nie znaleziono firm: {missing}'}), 400 + + # Create job + job = BulkEnrichmentJob( + started_by=current_user.id, + total_companies=len(company_ids), + steps=steps, + results={}, + ) + db.add(job) + db.commit() + + job_id = job.id + + # Start background thread + thread = threading.Thread( + target=_run_bulk_enrichment, + args=(job_id, company_ids, steps), + daemon=True, + ) + thread.start() + + logger.info(f"Bulk enrichment job {job_id} started by {current_user.email}: {len(company_ids)} companies, steps={steps}") + + return jsonify({'job_id': job_id, 'total': len(company_ids)}) + + finally: + db.close() + + +@bp.route('/data-quality/bulk-enrich/status') +@login_required +@role_required(SystemRole.ADMIN) +def bulk_enrich_status(): + """Check status of a bulk enrichment job.""" + job_id = request.args.get('job_id', type=int) + if not job_id: + return jsonify({'error': 'Brak job_id'}), 400 + + db = SessionLocal() + try: + job = db.query(BulkEnrichmentJob).filter_by(id=job_id).first() + if not job: + return jsonify({'error': 'Job nie znaleziony'}), 404 + + # Get latest result for progress log + latest_result = None + if job.results and job.processed_companies > 0: + results_dict = job.results + # Find the last processed company + for cid, data in results_dict.items(): + name = data.get('name', cid) + res = data.get('results', {}) + ok = sum(1 for v in res.values() if isinstance(v, str) and (v.startswith('OK') or v.startswith('SKIP (done)'))) + total_steps = len(res) + latest_result = f"{name}: {ok}/{total_steps} kroków OK" + + return jsonify({ + 'job_id': job.id, + 'status': job.status, + 'processed': job.processed_companies, + 'total': job.total_companies, + 'latest_result': latest_result, + 'results': job.results if job.status != 'running' else None, + }) + + finally: + db.close() diff --git a/blueprints/admin/routes_companies.py b/blueprints/admin/routes_companies.py index 4c603ef..2666e8e 100644 --- a/blueprints/admin/routes_companies.py +++ b/blueprints/admin/routes_companies.py @@ -21,6 +21,7 @@ from database import ( CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit ) from utils.decorators import role_required +from utils.data_quality import compute_data_quality_score, update_company_data_quality # Logger logger = logging.getLogger(__name__) @@ -157,6 +158,9 @@ def admin_company_add(): db.commit() db.refresh(new_company) + update_company_data_quality(new_company, db) + db.commit() + logger.info(f"Admin {current_user.email} created new company: {name} (ID: {new_company.id})") return jsonify({ @@ -263,6 +267,9 @@ def admin_company_update(company_id): company.last_updated = datetime.utcnow() db.commit() + update_company_data_quality(company, db) + db.commit() + logger.info(f"Admin {current_user.email} updated company {company.name} (ID: {company_id})") return jsonify({ @@ -726,27 +733,7 @@ def admin_company_detail(company_id): } # --- Completeness score (12 fields) --- - fields = { - 'NIP': bool(company.nip), - 'Adres': bool(company.address_city), - 'Telefon': bool(company.phone), - 'Email': bool(company.email), - 'Strona WWW': bool(company.website), - 'Opis': bool(company.description_short), - 'Kategoria': bool(company.category_id), - 'Logo': enrichment['logo']['done'], - 'Dane urzędowe': enrichment['registry']['done'], - 'Audyt SEO': enrichment['seo']['done'], - 'Audyt Social': enrichment['social']['done'], - 'Audyt GBP': enrichment['gbp']['done'], - } - - completeness = { - 'score': int(sum(fields.values()) / len(fields) * 100), - 'fields': fields, - 'total': len(fields), - 'filled': sum(fields.values()), - } + completeness = compute_data_quality_score(company, db) logger.info(f"Admin {current_user.email} viewed company detail: {company.name} (ID: {company_id})") diff --git a/blueprints/admin/routes_data_quality.py b/blueprints/admin/routes_data_quality.py new file mode 100644 index 0000000..8906b64 --- /dev/null +++ b/blueprints/admin/routes_data_quality.py @@ -0,0 +1,184 @@ +""" +Admin Data Quality Dashboard +============================= + +Aggregate view of company data quality and completeness across all companies. +""" + +import os +import logging +from datetime import datetime + +from flask import render_template +from flask_login import login_required +from sqlalchemy import func + +from . import bp +from database import ( + SessionLocal, Company, CompanyWebsiteAnalysis, + CompanySocialMedia, GBPAudit, SystemRole +) +from utils.decorators import role_required + +logger = logging.getLogger(__name__) + +LOGO_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'static', 'img', 'companies') + + +def _check_logo_exists(slug): + """Check if company logo file exists on disk.""" + if not slug: + return False + for ext in ('webp', 'svg'): + if os.path.isfile(os.path.join(LOGO_DIR, f'{slug}.{ext}')): + return True + return False + + +@bp.route('/data-quality') +@login_required +@role_required(SystemRole.ADMIN) +def admin_data_quality(): + """Data quality dashboard with aggregate stats.""" + db = SessionLocal() + try: + now = datetime.now() + + # Load all active/pending companies with minimal fields + companies = db.query(Company).filter( + Company.status.in_(['active', 'pending']) + ).order_by(Company.name).all() + + total = len(companies) + if total == 0: + return render_template( + 'admin/data_quality_dashboard.html', + total=0, field_stats={}, quality_dist={}, + score_dist={}, avg_score=0, companies_table=[], + now=now, + ) + + # Batch query: companies with SEO analysis + seo_company_ids = set( + row[0] for row in db.query(CompanyWebsiteAnalysis.company_id).all() + ) + + # Batch query: companies with social media profiles + social_counts = dict( + db.query( + CompanySocialMedia.company_id, + func.count(CompanySocialMedia.id) + ).group_by(CompanySocialMedia.company_id).all() + ) + + # Batch query: companies with GBP audit + gbp_company_ids = set( + row[0] for row in db.query(GBPAudit.company_id).distinct().all() + ) + + # Per-field coverage counters + field_counters = { + 'NIP': 0, + 'Adres': 0, + 'Telefon': 0, + 'Email': 0, + 'Strona WWW': 0, + 'Opis': 0, + 'Kategoria': 0, + 'Logo': 0, + 'Dane urzędowe': 0, + 'Audyt SEO': 0, + 'Audyt Social': 0, + 'Audyt GBP': 0, + } + + # Quality distribution + quality_dist = {'basic': 0, 'enhanced': 0, 'complete': 0} + score_dist = {'0-25': 0, '26-50': 0, '51-75': 0, '76-100': 0} + score_sum = 0 + + # Per-company table data + companies_table = [] + + for c in companies: + # Compute 12-field check + fields = { + 'NIP': bool(c.nip), + 'Adres': bool(c.address_city), + 'Telefon': bool(c.phone), + 'Email': bool(c.email), + 'Strona WWW': bool(c.website), + 'Opis': bool(c.description_short), + 'Kategoria': bool(c.category_id), + 'Logo': _check_logo_exists(c.slug), + 'Dane urzędowe': bool(c.ceidg_fetched_at or c.krs_fetched_at), + 'Audyt SEO': c.id in seo_company_ids, + 'Audyt Social': social_counts.get(c.id, 0) > 0, + 'Audyt GBP': c.id in gbp_company_ids, + } + + filled = sum(fields.values()) + score = int(filled / len(fields) * 100) + + # Update counters + for field_name, has_value in fields.items(): + if has_value: + field_counters[field_name] += 1 + + # Quality label + if score < 34: + label = 'basic' + elif score < 67: + label = 'enhanced' + else: + label = 'complete' + quality_dist[label] = quality_dist.get(label, 0) + 1 + + # Score distribution + if score <= 25: + score_dist['0-25'] += 1 + elif score <= 50: + score_dist['26-50'] += 1 + elif score <= 75: + score_dist['51-75'] += 1 + else: + score_dist['76-100'] += 1 + + score_sum += score + + companies_table.append({ + 'id': c.id, + 'name': c.name, + 'slug': c.slug, + 'score': score, + 'filled': filled, + 'total': len(fields), + 'label': label, + 'data_quality': c.data_quality or 'basic', + 'fields': fields, + 'status': c.status, + }) + + # Sort by score ascending (most incomplete first) + companies_table.sort(key=lambda x: x['score']) + + # Field stats as percentages + field_stats = { + name: {'count': count, 'pct': round(count / total * 100)} + for name, count in field_counters.items() + } + + avg_score = round(score_sum / total) if total > 0 else 0 + + return render_template( + 'admin/data_quality_dashboard.html', + total=total, + field_stats=field_stats, + quality_dist=quality_dist, + score_dist=score_dist, + avg_score=avg_score, + companies_table=companies_table, + now=now, + ) + finally: + db.close() diff --git a/blueprints/api/routes_company.py b/blueprints/api/routes_company.py index dab021a..f6ba210 100644 --- a/blueprints/api/routes_company.py +++ b/blueprints/api/routes_company.py @@ -23,6 +23,7 @@ from database import ( from datetime import timedelta import gemini_service import krs_api_service +from utils.data_quality import update_company_data_quality from ceidg_api_service import fetch_ceidg_by_nip from . import bp @@ -589,6 +590,9 @@ def api_enrich_company_registry(company_id): db.commit() + update_company_data_quality(company, db) + db.commit() + logger.info(f"Registry enrichment for company {company.id} ({company.name}) from {source} by {current_user.email}") return jsonify({ diff --git a/blueprints/public/routes_company_edit.py b/blueprints/public/routes_company_edit.py index 0ae9537..e8248a8 100644 --- a/blueprints/public/routes_company_edit.py +++ b/blueprints/public/routes_company_edit.py @@ -11,6 +11,7 @@ from blueprints.public import bp from sqlalchemy import or_ from database import SessionLocal, Company, CompanyContact, CompanySocialMedia, CompanyWebsite, Category from utils.helpers import sanitize_input, sanitize_html, validate_email, ensure_url +from utils.data_quality import update_company_data_quality from datetime import datetime import logging @@ -180,6 +181,10 @@ def company_edit_save(company_id=None): _save_social_media(db, company) db.commit() + + update_company_data_quality(company, db) + db.commit() + flash('Dane firmy zostały zaktualizowane.', 'success') return redirect(url_for('public.company_detail', company_id=company.id)) diff --git a/database.py b/database.py index cd9b862..4008c8f 100644 --- a/database.py +++ b/database.py @@ -1320,6 +1320,24 @@ class CompanyQualityTracking(Base): company = relationship('Company', back_populates='quality_tracking') +class BulkEnrichmentJob(Base): + """Tracks bulk enrichment jobs started from admin dashboard.""" + __tablename__ = 'bulk_enrichment_jobs' + + id = Column(Integer, primary_key=True) + started_at = Column(DateTime, default=datetime.now) + started_by = Column(Integer, ForeignKey('users.id')) + status = Column(String(20), default='running') # running, completed, failed + total_companies = Column(Integer, default=0) + processed_companies = Column(Integer, default=0) + steps = Column(PG_JSONB) # ['registry', 'seo', 'social', 'gbp', 'logo'] + results = Column(PG_JSONB, default={}) # {company_id: {step: result, ...}, ...} + completed_at = Column(DateTime) + + # Relationship + user = relationship('User') + + class CompanyWebsiteContent(Base): """Scraped website content for companies""" __tablename__ = 'company_website_content' diff --git a/database/migrations/075_bulk_enrichment_jobs.sql b/database/migrations/075_bulk_enrichment_jobs.sql new file mode 100644 index 0000000..9b2717c --- /dev/null +++ b/database/migrations/075_bulk_enrichment_jobs.sql @@ -0,0 +1,17 @@ +-- 075: Create bulk_enrichment_jobs table for tracking batch enrichment operations +-- Run: python3 scripts/run_migration.py database/migrations/075_bulk_enrichment_jobs.sql + +CREATE TABLE IF NOT EXISTS bulk_enrichment_jobs ( + id SERIAL PRIMARY KEY, + started_at TIMESTAMP DEFAULT NOW(), + started_by INTEGER REFERENCES users(id), + status VARCHAR(20) DEFAULT 'running', + total_companies INTEGER DEFAULT 0, + processed_companies INTEGER DEFAULT 0, + steps JSONB, + results JSONB DEFAULT '{}'::jsonb, + completed_at TIMESTAMP +); + +GRANT ALL ON TABLE bulk_enrichment_jobs TO nordabiz_app; +GRANT USAGE, SELECT ON SEQUENCE bulk_enrichment_jobs_id_seq TO nordabiz_app; diff --git a/gbp_audit_service.py b/gbp_audit_service.py index de78d57..2436743 100644 --- a/gbp_audit_service.py +++ b/gbp_audit_service.py @@ -24,6 +24,7 @@ from typing import Dict, List, Optional, Any from sqlalchemy.orm import Session from database import Company, GBPAudit, GBPReview, CompanyWebsiteAnalysis, SessionLocal, OAuthToken +from utils.data_quality import update_company_data_quality import gemini_service try: @@ -2029,6 +2030,18 @@ def fetch_google_business_data( db.commit() + # Flow GBP phone/website to Company if Company fields are empty + try: + if analysis.google_phone and not company.phone: + company.phone = analysis.google_phone + if analysis.google_website and not company.website: + company.website = analysis.google_website + update_company_data_quality(company, db) + db.commit() + except Exception as flow_err: + logger.warning(f"Failed to flow GBP data to Company {company_id}: {flow_err}") + db.rollback() + result['steps'][-1]['status'] = 'complete' result['steps'][-1]['message'] = 'Dane zapisane pomyślnie' result['success'] = True diff --git a/scripts/arm_company.py b/scripts/arm_company.py index 11ea620..8f47a6c 100644 --- a/scripts/arm_company.py +++ b/scripts/arm_company.py @@ -24,12 +24,27 @@ if scripts_dir not in sys.path: from database import SessionLocal, Company, CompanyWebsiteAnalysis, CompanySocialMedia, CompanyPKD, CompanyPerson from database import GBPAudit +from utils.data_quality import update_company_data_quality logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger('arm_company') -def arm_company(company_id, force=False): +ALL_STEPS = ['registry', 'seo', 'social', 'gbp', 'logo'] + + +def arm_company(company_id, force=False, steps=None, progress_callback=None): + """Arm a company with enrichment data. + + Args: + company_id: Company ID to enrich + force: Force re-execution of all steps + steps: List of steps to run (default: all). Options: registry, seo, social, gbp, logo + progress_callback: Optional callback(company_id, step, result_text) for bulk tracking + """ + if steps is None: + steps = ALL_STEPS + db = SessionLocal() try: company = db.query(Company).filter_by(id=company_id).first() @@ -41,13 +56,17 @@ def arm_company(company_id, force=False): print("Uzbrajam: %s (ID: %d)" % (company.name, company.id)) print("NIP: %s | WWW: %s" % (company.nip or '-', company.website or '-')) print("Tryb: %s" % ("FORCE (wszystkie kroki)" if force else "SMART (tylko brakujące)")) + if steps != ALL_STEPS: + print("Kroki: %s" % ', '.join(steps)) print("=" * 60) results = {} # --- Krok 1: Dane urzędowe --- registry_done = bool(company.krs_fetched_at or company.ceidg_fetched_at) - if force or not registry_done: + if 'registry' not in steps: + results['registry'] = 'SKIP (nie wybrano)' + elif force or not registry_done: if company.nip: print("\n[1/5] Pobieranie danych urzędowych...") try: @@ -194,7 +213,9 @@ def arm_company(company_id, force=False): # --- Krok 2: Audyt SEO --- seo_done = db.query(CompanyWebsiteAnalysis).filter_by(company_id=company.id).first() is not None - if force or not seo_done: + if 'seo' not in steps: + results['seo'] = 'SKIP (nie wybrano)' + elif force or not seo_done: if company.website: print("\n[2/5] Audyt SEO...") try: @@ -224,7 +245,9 @@ def arm_company(company_id, force=False): # --- Krok 3: Social Media --- social_done = db.query(CompanySocialMedia).filter_by(company_id=company.id).count() > 0 - if force or not social_done: + if 'social' not in steps: + results['social'] = 'SKIP (nie wybrano)' + elif force or not social_done: print("\n[3/5] Audyt Social Media...") try: from social_media_audit import SocialMediaAuditor @@ -254,7 +277,9 @@ def arm_company(company_id, force=False): # --- Krok 4: GBP --- gbp_done = db.query(GBPAudit).filter_by(company_id=company.id).first() is not None - if force or not gbp_done: + if 'gbp' not in steps: + results['gbp'] = 'SKIP (nie wybrano)' + elif force or not gbp_done: print("\n[4/5] Audyt GBP...") try: from gbp_audit_service import GBPAuditService @@ -284,7 +309,9 @@ def arm_company(company_id, force=False): logo_done = True break - if force or not logo_done: + if 'logo' not in steps: + results['logo'] = 'SKIP (nie wybrano)' + elif force or not logo_done: if company.website: print("\n[5/5] Pobieranie logo...") try: @@ -322,7 +349,15 @@ def arm_company(company_id, force=False): print(" Wynik: %d/5 kroków zaliczonych" % ok_count) print("=" * 60) - return True + # Update data quality score + dq = update_company_data_quality(company, db) + db.commit() + print("\n Data quality: %s (%d%%)" % (company.data_quality, dq['score'])) + + if progress_callback: + progress_callback(company_id, results) + + return results except Exception as e: logger.error("Błąd uzbrajania firmy %d: %s" % (company_id, str(e))) diff --git a/scripts/backfill_data_quality_scores.py b/scripts/backfill_data_quality_scores.py new file mode 100644 index 0000000..08d6097 --- /dev/null +++ b/scripts/backfill_data_quality_scores.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +""" +Backfill data quality scores for all companies. + +Usage: + python3 scripts/backfill_data_quality_scores.py +""" +import sys +import os + +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, BASE_DIR) + +from database import SessionLocal, Company +from utils.data_quality import update_company_data_quality + +BATCH_SIZE = 50 + + +def main(): + db = SessionLocal() + try: + companies = db.query(Company).all() + total = len(companies) + print(f"Backfilling data quality for {total} companies...") + + for i, company in enumerate(companies, 1): + result = update_company_data_quality(company, db) + + if i % BATCH_SIZE == 0: + db.commit() + print(f" [{i}/{total}] committed batch") + + db.commit() + print(f"Done. {total} companies updated.") + + # Summary + basic = sum(1 for c in companies if c.data_quality == 'basic') + enhanced = sum(1 for c in companies if c.data_quality == 'enhanced') + complete = sum(1 for c in companies if c.data_quality == 'complete') + print(f"\nSummary: basic={basic}, enhanced={enhanced}, complete={complete}") + + finally: + db.close() + + +if __name__ == '__main__': + main() diff --git a/templates/admin/data_quality_dashboard.html b/templates/admin/data_quality_dashboard.html new file mode 100644 index 0000000..4e20e83 --- /dev/null +++ b/templates/admin/data_quality_dashboard.html @@ -0,0 +1,717 @@ +{% extends "base.html" %} + +{% block title %}Jakość danych - Admin{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+
+

Jakość danych firm

+

Przegląd kompletności i jakości danych {{ total }} firm w katalogu

+
+
+ Stan na {{ now.strftime('%d.%m.%Y, %H:%M') }} +
+
+ + +
+
+
{{ total }}
+
Firm w katalogu
+
+
+
{{ avg_score }}%
+
Średnia kompletność
+
+
+
{{ quality_dist.get('complete', 0) }}
+
Kompletnych (67%+)
+
+
+
{{ quality_dist.get('basic', 0) }}
+
Podstawowych (<34%)
+
+
+ + +
+
Pokrycie danych per pole
+ {% for field_name, stats in field_stats.items() %} +
+
{{ field_name }}
+
+
+ {{ stats.pct }}% +
+
+
{{ stats.count }}/{{ total }}
+
+ {% endfor %} +
+ + +
+
Rozkład jakości danych
+
+
+
{{ quality_dist.get('basic', 0) }}
+
Podstawowe (<34%)
+
+
+
{{ quality_dist.get('enhanced', 0) }}
+
Rozszerzone (34-66%)
+
+
+
{{ quality_dist.get('complete', 0) }}
+
Kompletne (67%+)
+
+
+ +
+
+
{{ score_dist.get('0-25', 0) }}
+
0-25%
+
+
+
{{ score_dist.get('26-50', 0) }}
+
26-50%
+
+
+
{{ score_dist.get('51-75', 0) }}
+
51-75%
+
+
+
{{ score_dist.get('76-100', 0) }}
+
76-100%
+
+
+
+ + +
+
Firmy wg kompletności danych
+ + +
+ 0 zaznaczonych + + +
+ +
+
+ +
+
+ Pokazano {{ companies_table|length }} z {{ total }} firm +
+
+ + + + + + + + + + + + + + {% for c in companies_table %} + + + + + + + + + {% endfor %} + +
FirmaScorePolaKompletnośćJakość
+ + {{ c.name }} + + + + {{ c.score }}% + + {{ c.filled }}/{{ c.total }} +
+ {% for fname, fval in c.fields.items() %} + + {% endfor %} +
+
+ + {% if c.label == 'basic' %}Podstawowe{% elif c.label == 'enhanced' %}Rozszerzone{% else %}Kompletne{% endif %} + +
+
+ + + +{% endblock %} + +{% block extra_js %} + // Data Quality Dashboard JS + + function filterTable() { + var filter = document.getElementById('qualityFilter').value; + var rows = document.querySelectorAll('#companiesTable tbody tr'); + var shown = 0; + rows.forEach(function(row) { + if (filter === 'all' || row.dataset.quality === filter) { + row.style.display = ''; + shown++; + } else { + row.style.display = 'none'; + } + }); + document.getElementById('shownCount').textContent = shown; + } + + function sortTable(colIdx) { + var table = document.getElementById('companiesTable'); + var tbody = table.querySelector('tbody'); + var rows = Array.from(tbody.querySelectorAll('tr')); + var asc = table.dataset.sortCol == colIdx && table.dataset.sortDir !== 'asc'; + table.dataset.sortCol = colIdx; + table.dataset.sortDir = asc ? 'asc' : 'desc'; + + rows.sort(function(a, b) { + var aVal = a.cells[colIdx].textContent.trim().replace('%', ''); + var bVal = b.cells[colIdx].textContent.trim().replace('%', ''); + var aNum = parseFloat(aVal); + var bNum = parseFloat(bVal); + if (!isNaN(aNum) && !isNaN(bNum)) { + return asc ? aNum - bNum : bNum - aNum; + } + return asc ? aVal.localeCompare(bVal, 'pl') : bVal.localeCompare(aVal, 'pl'); + }); + + rows.forEach(function(row) { tbody.appendChild(row); }); + } + + // Checkbox selection + function toggleSelectAll() { + var checked = document.getElementById('selectAll').checked; + document.querySelectorAll('.company-cb').forEach(function(cb) { + var row = cb.closest('tr'); + if (row.style.display !== 'none') { + cb.checked = checked; + } + }); + updateBulkBar(); + } + + document.addEventListener('change', function(e) { + if (e.target.classList.contains('company-cb')) { + updateBulkBar(); + } + }); + + function updateBulkBar() { + var selected = document.querySelectorAll('.company-cb:checked').length; + var bar = document.getElementById('bulkBar'); + document.getElementById('selectedCount').textContent = selected; + if (selected > 0) { + bar.classList.add('active'); + } else { + bar.classList.remove('active'); + } + } + + function clearSelection() { + document.querySelectorAll('.company-cb').forEach(function(cb) { cb.checked = false; }); + document.getElementById('selectAll').checked = false; + updateBulkBar(); + } + + // Bulk enrich modal + function openBulkEnrich() { + var selected = document.querySelectorAll('.company-cb:checked').length; + document.getElementById('modalCount').textContent = selected; + document.getElementById('bulkModal').style.display = 'flex'; + document.getElementById('bulkProgress').style.display = 'none'; + } + + function closeBulkModal() { + document.getElementById('bulkModal').style.display = 'none'; + } + + function startBulkEnrich() { + var companyIds = []; + document.querySelectorAll('.company-cb:checked').forEach(function(cb) { + companyIds.push(parseInt(cb.value)); + }); + + var steps = []; + if (document.getElementById('step-registry').checked) steps.push('registry'); + if (document.getElementById('step-seo').checked) steps.push('seo'); + if (document.getElementById('step-social').checked) steps.push('social'); + if (document.getElementById('step-gbp').checked) steps.push('gbp'); + if (document.getElementById('step-logo').checked) steps.push('logo'); + + if (companyIds.length === 0 || steps.length === 0) return; + + document.getElementById('bulkProgress').style.display = 'block'; + document.getElementById('progressText').textContent = '0/' + companyIds.length; + document.getElementById('progressLog').innerHTML = ''; + + fetch('/admin/data-quality/bulk-enrich', { + method: 'POST', + headers: {'Content-Type': 'application/json', 'X-CSRFToken': document.querySelector('meta[name=csrf-token]')?.content || ''}, + body: JSON.stringify({company_ids: companyIds, steps: steps}) + }) + .then(function(r) { return r.json(); }) + .then(function(data) { + if (data.job_id) { + pollProgress(data.job_id, companyIds.length); + } + }) + .catch(function(err) { + document.getElementById('progressLog').innerHTML += '
Błąd: ' + err.message + '
'; + }); + } + + function pollProgress(jobId, total) { + fetch('/admin/data-quality/bulk-enrich/status?job_id=' + jobId) + .then(function(r) { return r.json(); }) + .then(function(data) { + var processed = data.processed || 0; + var pct = Math.round(processed / total * 100); + document.getElementById('progressBar').style.width = pct + '%'; + document.getElementById('progressText').textContent = processed + '/' + total; + + if (data.latest_result) { + var log = document.getElementById('progressLog'); + log.innerHTML += '
' + data.latest_result + '
'; + log.scrollTop = log.scrollHeight; + } + + if (data.status === 'running') { + setTimeout(function() { pollProgress(jobId, total); }, 2000); + } else { + document.getElementById('progressLog').innerHTML += '
Zakończono!
'; + } + }); + } +{% endblock %} diff --git a/templates/base.html b/templates/base.html index a673afd..b5520b0 100755 --- a/templates/base.html +++ b/templates/base.html @@ -1478,6 +1478,12 @@ Firmy + + + + + Jakość danych + {% if current_user.has_role(SystemRole.ADMIN) %} diff --git a/templates/company_detail.html b/templates/company_detail.html index 6b31554..97e859c 100755 --- a/templates/company_detail.html +++ b/templates/company_detail.html @@ -1164,6 +1164,64 @@ {% endif %} + +{% if website_analysis and website_analysis.google_opening_hours and website_analysis.google_opening_hours.weekday_text %} +{% set day_translations = { + 'Monday': 'Poniedziałek', + 'Tuesday': 'Wtorek', + 'Wednesday': 'Środa', + 'Thursday': 'Czwartek', + 'Friday': 'Piątek', + 'Saturday': 'Sobota', + 'Sunday': 'Niedziela' +} %} +
+

Godziny otwarcia

+
+
+
+ + + + +
+
+
+ {% for line in website_analysis.google_opening_hours.weekday_text %} + {% set parts = line.split(': ', 1) %} + {% set day_en = parts[0] if parts|length > 1 else '' %} + {% set hours_text = parts[1] if parts|length > 1 else line %} + {% set day_pl = day_translations.get(day_en, day_en) %} + {% set is_closed = 'Closed' in hours_text %} +
+ {{ day_pl }} + + {% if is_closed %}Zamknięte{% else %}{{ hours_text }}{% endif %} + +
+ {% endfor %} +
+ {% if website_analysis.google_opening_hours.open_now is not none %} +
+ {% if website_analysis.google_opening_hours.open_now %} + + + Teraz otwarte + + {% else %} + + + Teraz zamknięte + + {% endif %} +
+ {% endif %} +
+
+
+
+{% endif %} + {% set about_description = company.description_full or (ai_insights.business_summary if ai_insights else none) or (website_analysis.content_summary if website_analysis else none) %} {% set _about_hidden = company.is_section_hidden('about') %} diff --git a/utils/data_quality.py b/utils/data_quality.py new file mode 100644 index 0000000..e398d1c --- /dev/null +++ b/utils/data_quality.py @@ -0,0 +1,88 @@ +""" +Data Quality Service +==================== + +Computes and updates company data quality scores. +Extracted from inline completeness logic in admin routes. +""" + +import os + +from database import CompanyWebsiteAnalysis, CompanySocialMedia, GBPAudit + + +def compute_data_quality_score(company, db): + """Compute data quality score for a company. + + Returns dict with 'score' (0-100), 'fields' (name->bool), 'total', 'filled'. + """ + # Logo check (webp or svg) + logo_exists = False + for ext in ('webp', 'svg'): + if os.path.isfile(os.path.join('static', 'img', 'companies', f'{company.slug}.{ext}')): + logo_exists = True + break + + # Registry data + registry_done = bool(company.ceidg_fetched_at or company.krs_fetched_at) + + # SEO audit + seo_done = db.query(CompanyWebsiteAnalysis).filter( + CompanyWebsiteAnalysis.company_id == company.id + ).first() is not None + + # Social media audit + social_done = db.query(CompanySocialMedia).filter( + CompanySocialMedia.company_id == company.id + ).count() > 0 + + # GBP audit + gbp_done = db.query(GBPAudit).filter( + GBPAudit.company_id == company.id + ).first() is not None + + fields = { + 'NIP': bool(company.nip), + 'Adres': bool(company.address_city), + 'Telefon': bool(company.phone), + 'Email': bool(company.email), + 'Strona WWW': bool(company.website), + 'Opis': bool(company.description_short), + 'Kategoria': bool(company.category_id), + 'Logo': logo_exists, + 'Dane urzędowe': registry_done, + 'Audyt SEO': seo_done, + 'Audyt Social': social_done, + 'Audyt GBP': gbp_done, + } + + filled = sum(fields.values()) + total = len(fields) + score = int(filled / total * 100) + + return { + 'score': score, + 'fields': fields, + 'total': total, + 'filled': filled, + } + + +def compute_data_quality_label(score): + """Map numeric score to quality label.""" + if score < 34: + return 'basic' + elif score < 67: + return 'enhanced' + return 'complete' + + +def update_company_data_quality(company, db): + """Compute and persist data quality score on a company. + + Returns the result dict from compute_data_quality_score. + """ + result = compute_data_quality_score(company, db) + company.data_quality_score = result['score'] + company.data_quality = compute_data_quality_label(result['score']) + return result