nordabiz/blueprints/admin/routes_portal_seo.py
Maciej Pienczyn b0befd2973
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
fix: correct links_without_text count and add overall score calculation
1. seo_analyzer.py: Consider aria-label, title, img AND svg as valid
   link text (SVG icon links were falsely counted as "without text")

2. routes_portal_seo.py: Calculate overall_seo score using
   SEOAuditor._calculate_overall_score() before saving to DB
   (was always None because stream route bypasses audit_company())

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 16:09:54 +01:00

575 lines
21 KiB
Python

"""
Portal SEO Audit Routes
========================
Self-audit of nordabiznes.pl using the same SEOAuditor used for company audits.
Tracks results over time for before/after comparison.
"""
import logging
import json
import sys
import os
import time as time_module
from datetime import datetime, date
from decimal import Decimal
from flask import (
abort, render_template, request, redirect, url_for,
flash, Response, stream_with_context
)
from flask_login import login_required, current_user
from . import bp
from database import SessionLocal, PortalSEOAudit
from utils.decorators import is_audit_owner
logger = logging.getLogger(__name__)
PORTAL_URL = 'https://nordabiznes.pl'
# Path to scripts/ for SEOAuditor components
_SCRIPTS_DIR = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'scripts'
)
def _make_json_safe(obj):
"""Recursively convert datetime/Decimal objects to JSON-serializable types."""
if isinstance(obj, dict):
return {k: _make_json_safe(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [_make_json_safe(v) for v in obj]
elif isinstance(obj, (datetime, date)):
return obj.isoformat()
elif isinstance(obj, Decimal):
return float(obj)
return obj
def _sse_event(data):
"""Format SSE event."""
return f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
def _get_auditor():
"""Import and return SEOAuditor instance."""
if _SCRIPTS_DIR not in sys.path:
sys.path.insert(0, _SCRIPTS_DIR)
from seo_audit import SEOAuditor
return SEOAuditor()
def _save_audit_to_db(result, notes, user_email):
"""Extract fields from SEOAuditor result dict and save to database.
Result structure (after to_dict()):
- result['onpage']['meta_tags']['title'] / ['description']
- result['onpage']['images']['total_images'] / ['images_without_alt']
- result['onpage']['structured_data']['has_structured_data']
- result['onpage']['open_graph']['og_title'] (non-None = has OG)
- result['technical']['robots_txt']['exists']
- result['technical']['sitemap']['exists']
- result['technical']['canonical']['has_canonical']
- result['pagespeed']['scores']['performance'] etc.
- result['pagespeed']['core_web_vitals']['lcp_ms'] etc.
"""
onpage = result.get('onpage', {})
tech = result.get('technical', {})
ps = result.get('pagespeed', {})
scores = result.get('scores', {})
# On-page nested
meta_tags = onpage.get('meta_tags', {})
images = onpage.get('images', {})
structured = onpage.get('structured_data', {})
og = onpage.get('open_graph', {})
# Technical nested
robots = tech.get('robots_txt', {})
sitemap = tech.get('sitemap', {})
canonical = tech.get('canonical', {})
indexability = tech.get('indexability', {})
# PageSpeed nested
cwv = ps.get('core_web_vitals', {})
# Security headers (from technical redirect chain response headers)
# These are stored separately in audit_company result
sec = result.get('security_headers', {})
db = SessionLocal()
try:
audit = PortalSEOAudit(
audited_at=datetime.now(),
url=PORTAL_URL,
# PageSpeed scores
pagespeed_performance=scores.get('pagespeed_performance'),
pagespeed_seo=scores.get('pagespeed_seo'),
pagespeed_accessibility=scores.get('pagespeed_accessibility'),
pagespeed_best_practices=scores.get('pagespeed_best_practices'),
# Core Web Vitals
lcp_ms=cwv.get('lcp_ms'),
fcp_ms=cwv.get('fcp_ms'),
cls=cwv.get('cls'),
tbt_ms=cwv.get('tbt_ms'),
speed_index_ms=cwv.get('speed_index_ms'),
# On-page checks
has_meta_title=bool(meta_tags.get('title')),
has_meta_description=bool(meta_tags.get('description')),
has_canonical=canonical.get('has_canonical', False),
has_robots_txt=robots.get('exists', False),
has_sitemap=sitemap.get('exists', False),
has_structured_data=structured.get('has_structured_data', False),
has_og_tags=bool(og.get('og_title')),
has_ssl=result.get('final_url', '').startswith('https'),
is_mobile_friendly=indexability.get('is_indexable'),
# Security headers
has_hsts=sec.get('has_hsts'),
has_csp=sec.get('has_csp'),
has_x_frame=sec.get('has_x_frame_options'),
has_x_content_type=sec.get('has_x_content_type'),
# Content metrics
image_count=images.get('total_images'),
images_without_alt=images.get('images_without_alt'),
# Full data
full_results=_make_json_safe(result),
notes=notes,
created_by=user_email
)
db.add(audit)
db.commit()
return audit.id
except Exception:
db.rollback()
raise
finally:
db.close()
@bp.route('/portal-seo')
@login_required
def admin_portal_seo():
"""Portal SEO audit history dashboard"""
if not is_audit_owner():
abort(404)
db = SessionLocal()
try:
audits = db.query(PortalSEOAudit).order_by(
PortalSEOAudit.audited_at.desc()
).all()
return render_template(
'admin/portal_seo.html',
audits=audits,
portal_url=PORTAL_URL
)
finally:
db.close()
@bp.route('/portal-seo/run/stream')
@login_required
def admin_portal_seo_run_stream():
"""SSE endpoint for streaming portal SEO audit progress step by step.
Runs the same audit pipeline as SEOAuditor.audit_company() but yields
progress events between each step.
"""
if not is_audit_owner():
abort(404)
notes = request.args.get('notes', '')
user_email = current_user.email
TOTAL = 9
def generate():
try:
# Step 1: Init auditor
yield _sse_event({
'step': 1, 'total': TOTAL,
'message': 'Inicjalizacja audytora SEO...', 'status': 'running'
})
auditor = _get_auditor()
yield _sse_event({
'step': 1, 'total': TOTAL,
'message': 'Audytor SEO zainicjalizowany', 'status': 'done'
})
company = {
'id': 0,
'name': 'NordaBiznes.pl',
'slug': 'nordabiznes-pl',
'website': PORTAL_URL,
'address_city': 'Wejherowo'
}
result = {
'company_id': 0,
'company_name': 'NordaBiznes.pl',
'audit_date': datetime.now(),
'website_url': PORTAL_URL,
'errors': [],
'scores': {},
}
html_content = None
final_url = PORTAL_URL
# Step 2: Fetch page
yield _sse_event({
'step': 2, 'total': TOTAL,
'message': 'Pobieranie strony nordabiznes.pl...', 'status': 'running'
})
try:
start = time_module.time()
resp = auditor.session.get(
PORTAL_URL, timeout=15, allow_redirects=True
)
load_ms = int((time_module.time() - start) * 1000)
final_url = resp.url
result['http_status'] = resp.status_code
result['load_time_ms'] = load_ms
result['final_url'] = final_url
# Extract security headers from HTTP response
hdrs = resp.headers
result['security_headers'] = {
'has_hsts': 'strict-transport-security' in hdrs,
'has_csp': 'content-security-policy' in hdrs,
'has_x_frame_options': 'x-frame-options' in hdrs,
'has_x_content_type': 'x-content-type-options' in hdrs,
}
if resp.status_code == 200:
if resp.encoding and resp.encoding.lower() == 'iso-8859-1':
resp.encoding = resp.apparent_encoding
html_content = resp.text
yield _sse_event({
'step': 2, 'total': TOTAL,
'message': f'Strona pobrana ({load_ms}ms, {len(html_content)//1024}KB)',
'status': 'done'
})
else:
result['errors'].append(f'HTTP {resp.status_code}')
yield _sse_event({
'step': 2, 'total': TOTAL,
'message': f'HTTP {resp.status_code}', 'status': 'warning'
})
except Exception as e:
result['errors'].append(str(e)[:100])
yield _sse_event({
'step': 2, 'total': TOTAL,
'message': f'Błąd: {str(e)[:80]}', 'status': 'error'
})
# Step 3: On-page analysis
yield _sse_event({
'step': 3, 'total': TOTAL,
'message': 'Analiza on-page SEO (meta tagi, nagłówki, obrazy)...',
'status': 'running'
})
if html_content:
try:
onpage_obj = auditor.onpage_analyzer.analyze_html(
html_content, base_url=final_url
)
onpage_dict = onpage_obj.to_dict()
result['onpage'] = onpage_dict
# Build summary
mt = onpage_dict.get('meta_tags', {})
imgs = onpage_dict.get('images', {})
sd = onpage_dict.get('structured_data', {})
title = mt.get('title', '')
title_short = (title[:35] + '...') if title and len(title) > 35 else title
parts = []
if title:
parts.append(f'title="{title_short}"')
parts.append(f'{imgs.get("total_images", 0)} obrazów')
if imgs.get('images_without_alt'):
parts.append(f'{imgs["images_without_alt"]} bez alt')
if sd.get('has_structured_data'):
parts.append('Schema.org')
yield _sse_event({
'step': 3, 'total': TOTAL,
'message': f'On-page: {", ".join(parts)}',
'status': 'done'
})
except Exception as e:
result['errors'].append(f'On-page: {str(e)[:100]}')
yield _sse_event({
'step': 3, 'total': TOTAL,
'message': f'On-page błąd: {str(e)[:80]}',
'status': 'error'
})
else:
yield _sse_event({
'step': 3, 'total': TOTAL,
'message': 'Pominięto (brak HTML)', 'status': 'skipped'
})
# Step 4: Technical SEO
yield _sse_event({
'step': 4, 'total': TOTAL,
'message': 'Sprawdzanie techniczne (robots.txt, sitemap, SSL, canonical)...',
'status': 'running'
})
try:
tech_obj = auditor.technical_checker.check_url(final_url)
tech_dict = tech_obj.to_dict()
result['technical'] = tech_dict
checks_ok = []
checks_fail = []
if tech_dict.get('robots_txt', {}).get('exists'):
checks_ok.append('robots.txt')
else:
checks_fail.append('robots.txt')
if tech_dict.get('sitemap', {}).get('exists'):
checks_ok.append('sitemap')
else:
checks_fail.append('sitemap')
if tech_dict.get('canonical', {}).get('has_canonical'):
checks_ok.append('canonical')
else:
checks_fail.append('canonical')
msg_parts = []
if checks_ok:
msg_parts.append(f'OK: {", ".join(checks_ok)}')
if checks_fail:
msg_parts.append(f'Brak: {", ".join(checks_fail)}')
yield _sse_event({
'step': 4, 'total': TOTAL,
'message': f'Technical: {" | ".join(msg_parts)}',
'status': 'done'
})
except Exception as e:
result['errors'].append(f'Technical: {str(e)[:100]}')
yield _sse_event({
'step': 4, 'total': TOTAL,
'message': f'Technical błąd: {str(e)[:80]}',
'status': 'error'
})
# Step 5: PageSpeed Insights
yield _sse_event({
'step': 5, 'total': TOTAL,
'message': 'PageSpeed Insights API (może potrwać do 30s)...',
'status': 'running'
})
try:
remaining = auditor.pagespeed_client.get_remaining_quota()
if remaining > 0:
from pagespeed_client import Strategy
ps_result = auditor.pagespeed_client.analyze_url(
final_url, strategy=Strategy.MOBILE
)
result['pagespeed'] = ps_result.to_dict()
result['scores'] = {
'pagespeed_seo': ps_result.scores.seo,
'pagespeed_performance': ps_result.scores.performance,
'pagespeed_accessibility': ps_result.scores.accessibility,
'pagespeed_best_practices': ps_result.scores.best_practices,
}
yield _sse_event({
'step': 5, 'total': TOTAL,
'message': (
f'Perf={ps_result.scores.performance}, '
f'SEO={ps_result.scores.seo}, '
f'A11y={ps_result.scores.accessibility}, '
f'BP={ps_result.scores.best_practices}'
),
'status': 'done'
})
else:
result['errors'].append('PageSpeed API quota exceeded')
yield _sse_event({
'step': 5, 'total': TOTAL,
'message': 'Limit API wyczerpany',
'status': 'warning'
})
except Exception as e:
result['errors'].append(f'PageSpeed: {str(e)[:100]}')
yield _sse_event({
'step': 5, 'total': TOTAL,
'message': f'PageSpeed błąd: {str(e)[:80]}',
'status': 'error'
})
# Step 6: Local SEO
yield _sse_event({
'step': 6, 'total': TOTAL,
'message': 'Analiza Local SEO (NAP, Google Maps, lokalne słowa kluczowe)...',
'status': 'running'
})
if html_content:
try:
local_seo = auditor.local_seo_analyzer.analyze(
html_content, final_url, company
)
result['local_seo'] = local_seo
score = local_seo.get('local_seo_score', 0)
yield _sse_event({
'step': 6, 'total': TOTAL,
'message': f'Local SEO score: {score}/100',
'status': 'done'
})
except Exception as e:
result['errors'].append(f'Local SEO: {str(e)[:100]}')
yield _sse_event({
'step': 6, 'total': TOTAL,
'message': f'Local SEO błąd: {str(e)[:80]}',
'status': 'error'
})
else:
yield _sse_event({
'step': 6, 'total': TOTAL,
'message': 'Pominięto (brak HTML)', 'status': 'skipped'
})
# Step 7: Citations
yield _sse_event({
'step': 7, 'total': TOTAL,
'message': 'Sprawdzanie cytowań w katalogach (Google, Yelp, Facebook)...',
'status': 'running'
})
try:
citations = auditor.citation_checker.check_citations(
'NordaBiznes.pl', 'Wejherowo'
)
result['citations'] = citations
found = sum(1 for c in citations if c.get('status') == 'found')
yield _sse_event({
'step': 7, 'total': TOTAL,
'message': f'Cytowania: {found}/{len(citations)} znalezione',
'status': 'done'
})
except Exception as e:
result['errors'].append(f'Citations: {str(e)[:100]}')
yield _sse_event({
'step': 7, 'total': TOTAL,
'message': f'Citations błąd: {str(e)[:80]}',
'status': 'error'
})
# Step 8: Content freshness
yield _sse_event({
'step': 8, 'total': TOTAL,
'message': 'Sprawdzanie aktualności treści...',
'status': 'running'
})
try:
freshness = auditor.freshness_checker.check_freshness(
final_url, html_content
)
result['freshness'] = freshness
fscore = freshness.get('content_freshness_score', 0)
yield _sse_event({
'step': 8, 'total': TOTAL,
'message': f'Aktualność treści: {fscore}/100',
'status': 'done'
})
except Exception as e:
result['errors'].append(f'Freshness: {str(e)[:100]}')
yield _sse_event({
'step': 8, 'total': TOTAL,
'message': f'Freshness błąd: {str(e)[:80]}',
'status': 'error'
})
# Calculate overall score (same algorithm as SEOAuditor)
try:
overall = auditor._calculate_overall_score(result)
result['scores']['overall_seo'] = overall
except Exception:
pass
# Step 9: Save to DB
yield _sse_event({
'step': 9, 'total': TOTAL,
'message': 'Zapisywanie wyników do bazy danych...',
'status': 'running'
})
try:
audit_id = _save_audit_to_db(result, notes, user_email)
yield _sse_event({
'step': 9, 'total': TOTAL,
'message': f'Zapisano audyt #{audit_id}',
'status': 'done'
})
yield _sse_event({
'status': 'complete',
'audit_id': audit_id,
'performance': result.get('scores', {}).get('pagespeed_performance'),
'seo': result.get('scores', {}).get('pagespeed_seo'),
'errors': result.get('errors', []),
})
except Exception as e:
logger.error(f'Portal SEO save failed: {e}', exc_info=True)
yield _sse_event({
'step': 9, 'total': TOTAL,
'message': f'Błąd zapisu: {str(e)[:80]}',
'status': 'error'
})
yield _sse_event({'status': 'error', 'message': str(e)[:200]})
except Exception as e:
logger.error(f'Portal SEO audit stream failed: {e}', exc_info=True)
yield _sse_event({
'status': 'error',
'message': f'Krytyczny błąd: {str(e)[:200]}'
})
return Response(
stream_with_context(generate()),
mimetype='text/event-stream',
headers={
'Cache-Control': 'no-cache',
'X-Accel-Buffering': 'no',
}
)
@bp.route('/portal-seo/<int:audit_id>')
@login_required
def admin_portal_seo_detail(audit_id):
"""View detailed results of a specific portal SEO audit"""
if not is_audit_owner():
abort(404)
db = SessionLocal()
try:
audit = db.query(PortalSEOAudit).get(audit_id)
if not audit:
flash('Audyt nie znaleziony.', 'error')
return redirect(url_for('admin.admin_portal_seo'))
# Get previous audit for comparison
prev_audit = db.query(PortalSEOAudit).filter(
PortalSEOAudit.audited_at < audit.audited_at
).order_by(PortalSEOAudit.audited_at.desc()).first()
return render_template(
'admin/portal_seo_detail.html',
audit=audit,
prev_audit=prev_audit,
portal_url=PORTAL_URL
)
finally:
db.close()