Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Extract 12-field completeness scoring to utils/data_quality.py service - Auto-update data_quality_score and data_quality label on company data changes - Add /admin/data-quality dashboard with field coverage stats, quality distribution, and sortable company table - Add bulk enrichment with background processing, step selection, and progress tracking - Flow GBP phone/website to Company record when company fields are empty - Display Google opening hours on public company profile - Add BulkEnrichmentJob model and migration 075 - Refactor arm_company.py to support selective steps and progress callbacks Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
185 lines
5.5 KiB
Python
185 lines
5.5 KiB
Python
"""
|
|
Admin Data Quality Dashboard
|
|
=============================
|
|
|
|
Aggregate view of company data quality and completeness across all companies.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
from flask import render_template
|
|
from flask_login import login_required
|
|
from sqlalchemy import func
|
|
|
|
from . import bp
|
|
from database import (
|
|
SessionLocal, Company, CompanyWebsiteAnalysis,
|
|
CompanySocialMedia, GBPAudit, SystemRole
|
|
)
|
|
from utils.decorators import role_required
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
LOGO_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'static', 'img', 'companies')
|
|
|
|
|
|
def _check_logo_exists(slug):
|
|
"""Check if company logo file exists on disk."""
|
|
if not slug:
|
|
return False
|
|
for ext in ('webp', 'svg'):
|
|
if os.path.isfile(os.path.join(LOGO_DIR, f'{slug}.{ext}')):
|
|
return True
|
|
return False
|
|
|
|
|
|
@bp.route('/data-quality')
|
|
@login_required
|
|
@role_required(SystemRole.ADMIN)
|
|
def admin_data_quality():
|
|
"""Data quality dashboard with aggregate stats."""
|
|
db = SessionLocal()
|
|
try:
|
|
now = datetime.now()
|
|
|
|
# Load all active/pending companies with minimal fields
|
|
companies = db.query(Company).filter(
|
|
Company.status.in_(['active', 'pending'])
|
|
).order_by(Company.name).all()
|
|
|
|
total = len(companies)
|
|
if total == 0:
|
|
return render_template(
|
|
'admin/data_quality_dashboard.html',
|
|
total=0, field_stats={}, quality_dist={},
|
|
score_dist={}, avg_score=0, companies_table=[],
|
|
now=now,
|
|
)
|
|
|
|
# Batch query: companies with SEO analysis
|
|
seo_company_ids = set(
|
|
row[0] for row in db.query(CompanyWebsiteAnalysis.company_id).all()
|
|
)
|
|
|
|
# Batch query: companies with social media profiles
|
|
social_counts = dict(
|
|
db.query(
|
|
CompanySocialMedia.company_id,
|
|
func.count(CompanySocialMedia.id)
|
|
).group_by(CompanySocialMedia.company_id).all()
|
|
)
|
|
|
|
# Batch query: companies with GBP audit
|
|
gbp_company_ids = set(
|
|
row[0] for row in db.query(GBPAudit.company_id).distinct().all()
|
|
)
|
|
|
|
# Per-field coverage counters
|
|
field_counters = {
|
|
'NIP': 0,
|
|
'Adres': 0,
|
|
'Telefon': 0,
|
|
'Email': 0,
|
|
'Strona WWW': 0,
|
|
'Opis': 0,
|
|
'Kategoria': 0,
|
|
'Logo': 0,
|
|
'Dane urzędowe': 0,
|
|
'Audyt SEO': 0,
|
|
'Audyt Social': 0,
|
|
'Audyt GBP': 0,
|
|
}
|
|
|
|
# Quality distribution
|
|
quality_dist = {'basic': 0, 'enhanced': 0, 'complete': 0}
|
|
score_dist = {'0-25': 0, '26-50': 0, '51-75': 0, '76-100': 0}
|
|
score_sum = 0
|
|
|
|
# Per-company table data
|
|
companies_table = []
|
|
|
|
for c in companies:
|
|
# Compute 12-field check
|
|
fields = {
|
|
'NIP': bool(c.nip),
|
|
'Adres': bool(c.address_city),
|
|
'Telefon': bool(c.phone),
|
|
'Email': bool(c.email),
|
|
'Strona WWW': bool(c.website),
|
|
'Opis': bool(c.description_short),
|
|
'Kategoria': bool(c.category_id),
|
|
'Logo': _check_logo_exists(c.slug),
|
|
'Dane urzędowe': bool(c.ceidg_fetched_at or c.krs_fetched_at),
|
|
'Audyt SEO': c.id in seo_company_ids,
|
|
'Audyt Social': social_counts.get(c.id, 0) > 0,
|
|
'Audyt GBP': c.id in gbp_company_ids,
|
|
}
|
|
|
|
filled = sum(fields.values())
|
|
score = int(filled / len(fields) * 100)
|
|
|
|
# Update counters
|
|
for field_name, has_value in fields.items():
|
|
if has_value:
|
|
field_counters[field_name] += 1
|
|
|
|
# Quality label
|
|
if score < 34:
|
|
label = 'basic'
|
|
elif score < 67:
|
|
label = 'enhanced'
|
|
else:
|
|
label = 'complete'
|
|
quality_dist[label] = quality_dist.get(label, 0) + 1
|
|
|
|
# Score distribution
|
|
if score <= 25:
|
|
score_dist['0-25'] += 1
|
|
elif score <= 50:
|
|
score_dist['26-50'] += 1
|
|
elif score <= 75:
|
|
score_dist['51-75'] += 1
|
|
else:
|
|
score_dist['76-100'] += 1
|
|
|
|
score_sum += score
|
|
|
|
companies_table.append({
|
|
'id': c.id,
|
|
'name': c.name,
|
|
'slug': c.slug,
|
|
'score': score,
|
|
'filled': filled,
|
|
'total': len(fields),
|
|
'label': label,
|
|
'data_quality': c.data_quality or 'basic',
|
|
'fields': fields,
|
|
'status': c.status,
|
|
})
|
|
|
|
# Sort by score ascending (most incomplete first)
|
|
companies_table.sort(key=lambda x: x['score'])
|
|
|
|
# Field stats as percentages
|
|
field_stats = {
|
|
name: {'count': count, 'pct': round(count / total * 100)}
|
|
for name, count in field_counters.items()
|
|
}
|
|
|
|
avg_score = round(score_sum / total) if total > 0 else 0
|
|
|
|
return render_template(
|
|
'admin/data_quality_dashboard.html',
|
|
total=total,
|
|
field_stats=field_stats,
|
|
quality_dist=quality_dist,
|
|
score_dist=score_dist,
|
|
avg_score=avg_score,
|
|
companies_table=companies_table,
|
|
now=now,
|
|
)
|
|
finally:
|
|
db.close()
|