Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
1. seo_analyzer.py: Consider aria-label, title, img AND svg as valid link text (SVG icon links were falsely counted as "without text") 2. routes_portal_seo.py: Calculate overall_seo score using SEOAuditor._calculate_overall_score() before saving to DB (was always None because stream route bypasses audit_company()) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
575 lines
21 KiB
Python
575 lines
21 KiB
Python
"""
|
|
Portal SEO Audit Routes
|
|
========================
|
|
|
|
Self-audit of nordabiznes.pl using the same SEOAuditor used for company audits.
|
|
Tracks results over time for before/after comparison.
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
import sys
|
|
import os
|
|
import time as time_module
|
|
from datetime import datetime, date
|
|
from decimal import Decimal
|
|
|
|
from flask import (
|
|
abort, render_template, request, redirect, url_for,
|
|
flash, Response, stream_with_context
|
|
)
|
|
from flask_login import login_required, current_user
|
|
|
|
from . import bp
|
|
from database import SessionLocal, PortalSEOAudit
|
|
from utils.decorators import is_audit_owner
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
PORTAL_URL = 'https://nordabiznes.pl'
|
|
|
|
# Path to scripts/ for SEOAuditor components
|
|
_SCRIPTS_DIR = os.path.join(
|
|
os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'scripts'
|
|
)
|
|
|
|
|
|
def _make_json_safe(obj):
|
|
"""Recursively convert datetime/Decimal objects to JSON-serializable types."""
|
|
if isinstance(obj, dict):
|
|
return {k: _make_json_safe(v) for k, v in obj.items()}
|
|
elif isinstance(obj, list):
|
|
return [_make_json_safe(v) for v in obj]
|
|
elif isinstance(obj, (datetime, date)):
|
|
return obj.isoformat()
|
|
elif isinstance(obj, Decimal):
|
|
return float(obj)
|
|
return obj
|
|
|
|
|
|
def _sse_event(data):
|
|
"""Format SSE event."""
|
|
return f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
|
|
|
|
|
|
def _get_auditor():
|
|
"""Import and return SEOAuditor instance."""
|
|
if _SCRIPTS_DIR not in sys.path:
|
|
sys.path.insert(0, _SCRIPTS_DIR)
|
|
from seo_audit import SEOAuditor
|
|
return SEOAuditor()
|
|
|
|
|
|
def _save_audit_to_db(result, notes, user_email):
|
|
"""Extract fields from SEOAuditor result dict and save to database.
|
|
|
|
Result structure (after to_dict()):
|
|
- result['onpage']['meta_tags']['title'] / ['description']
|
|
- result['onpage']['images']['total_images'] / ['images_without_alt']
|
|
- result['onpage']['structured_data']['has_structured_data']
|
|
- result['onpage']['open_graph']['og_title'] (non-None = has OG)
|
|
- result['technical']['robots_txt']['exists']
|
|
- result['technical']['sitemap']['exists']
|
|
- result['technical']['canonical']['has_canonical']
|
|
- result['pagespeed']['scores']['performance'] etc.
|
|
- result['pagespeed']['core_web_vitals']['lcp_ms'] etc.
|
|
"""
|
|
onpage = result.get('onpage', {})
|
|
tech = result.get('technical', {})
|
|
ps = result.get('pagespeed', {})
|
|
scores = result.get('scores', {})
|
|
|
|
# On-page nested
|
|
meta_tags = onpage.get('meta_tags', {})
|
|
images = onpage.get('images', {})
|
|
structured = onpage.get('structured_data', {})
|
|
og = onpage.get('open_graph', {})
|
|
|
|
# Technical nested
|
|
robots = tech.get('robots_txt', {})
|
|
sitemap = tech.get('sitemap', {})
|
|
canonical = tech.get('canonical', {})
|
|
indexability = tech.get('indexability', {})
|
|
|
|
# PageSpeed nested
|
|
cwv = ps.get('core_web_vitals', {})
|
|
|
|
# Security headers (from technical redirect chain response headers)
|
|
# These are stored separately in audit_company result
|
|
sec = result.get('security_headers', {})
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
audit = PortalSEOAudit(
|
|
audited_at=datetime.now(),
|
|
url=PORTAL_URL,
|
|
# PageSpeed scores
|
|
pagespeed_performance=scores.get('pagespeed_performance'),
|
|
pagespeed_seo=scores.get('pagespeed_seo'),
|
|
pagespeed_accessibility=scores.get('pagespeed_accessibility'),
|
|
pagespeed_best_practices=scores.get('pagespeed_best_practices'),
|
|
# Core Web Vitals
|
|
lcp_ms=cwv.get('lcp_ms'),
|
|
fcp_ms=cwv.get('fcp_ms'),
|
|
cls=cwv.get('cls'),
|
|
tbt_ms=cwv.get('tbt_ms'),
|
|
speed_index_ms=cwv.get('speed_index_ms'),
|
|
# On-page checks
|
|
has_meta_title=bool(meta_tags.get('title')),
|
|
has_meta_description=bool(meta_tags.get('description')),
|
|
has_canonical=canonical.get('has_canonical', False),
|
|
has_robots_txt=robots.get('exists', False),
|
|
has_sitemap=sitemap.get('exists', False),
|
|
has_structured_data=structured.get('has_structured_data', False),
|
|
has_og_tags=bool(og.get('og_title')),
|
|
has_ssl=result.get('final_url', '').startswith('https'),
|
|
is_mobile_friendly=indexability.get('is_indexable'),
|
|
# Security headers
|
|
has_hsts=sec.get('has_hsts'),
|
|
has_csp=sec.get('has_csp'),
|
|
has_x_frame=sec.get('has_x_frame_options'),
|
|
has_x_content_type=sec.get('has_x_content_type'),
|
|
# Content metrics
|
|
image_count=images.get('total_images'),
|
|
images_without_alt=images.get('images_without_alt'),
|
|
# Full data
|
|
full_results=_make_json_safe(result),
|
|
notes=notes,
|
|
created_by=user_email
|
|
)
|
|
db.add(audit)
|
|
db.commit()
|
|
return audit.id
|
|
except Exception:
|
|
db.rollback()
|
|
raise
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/portal-seo')
|
|
@login_required
|
|
def admin_portal_seo():
|
|
"""Portal SEO audit history dashboard"""
|
|
if not is_audit_owner():
|
|
abort(404)
|
|
db = SessionLocal()
|
|
try:
|
|
audits = db.query(PortalSEOAudit).order_by(
|
|
PortalSEOAudit.audited_at.desc()
|
|
).all()
|
|
|
|
return render_template(
|
|
'admin/portal_seo.html',
|
|
audits=audits,
|
|
portal_url=PORTAL_URL
|
|
)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@bp.route('/portal-seo/run/stream')
|
|
@login_required
|
|
def admin_portal_seo_run_stream():
|
|
"""SSE endpoint for streaming portal SEO audit progress step by step.
|
|
|
|
Runs the same audit pipeline as SEOAuditor.audit_company() but yields
|
|
progress events between each step.
|
|
"""
|
|
if not is_audit_owner():
|
|
abort(404)
|
|
|
|
notes = request.args.get('notes', '')
|
|
user_email = current_user.email
|
|
|
|
TOTAL = 9
|
|
|
|
def generate():
|
|
try:
|
|
# Step 1: Init auditor
|
|
yield _sse_event({
|
|
'step': 1, 'total': TOTAL,
|
|
'message': 'Inicjalizacja audytora SEO...', 'status': 'running'
|
|
})
|
|
|
|
auditor = _get_auditor()
|
|
|
|
yield _sse_event({
|
|
'step': 1, 'total': TOTAL,
|
|
'message': 'Audytor SEO zainicjalizowany', 'status': 'done'
|
|
})
|
|
|
|
company = {
|
|
'id': 0,
|
|
'name': 'NordaBiznes.pl',
|
|
'slug': 'nordabiznes-pl',
|
|
'website': PORTAL_URL,
|
|
'address_city': 'Wejherowo'
|
|
}
|
|
result = {
|
|
'company_id': 0,
|
|
'company_name': 'NordaBiznes.pl',
|
|
'audit_date': datetime.now(),
|
|
'website_url': PORTAL_URL,
|
|
'errors': [],
|
|
'scores': {},
|
|
}
|
|
html_content = None
|
|
final_url = PORTAL_URL
|
|
|
|
# Step 2: Fetch page
|
|
yield _sse_event({
|
|
'step': 2, 'total': TOTAL,
|
|
'message': 'Pobieranie strony nordabiznes.pl...', 'status': 'running'
|
|
})
|
|
|
|
try:
|
|
start = time_module.time()
|
|
resp = auditor.session.get(
|
|
PORTAL_URL, timeout=15, allow_redirects=True
|
|
)
|
|
load_ms = int((time_module.time() - start) * 1000)
|
|
final_url = resp.url
|
|
result['http_status'] = resp.status_code
|
|
result['load_time_ms'] = load_ms
|
|
result['final_url'] = final_url
|
|
|
|
# Extract security headers from HTTP response
|
|
hdrs = resp.headers
|
|
result['security_headers'] = {
|
|
'has_hsts': 'strict-transport-security' in hdrs,
|
|
'has_csp': 'content-security-policy' in hdrs,
|
|
'has_x_frame_options': 'x-frame-options' in hdrs,
|
|
'has_x_content_type': 'x-content-type-options' in hdrs,
|
|
}
|
|
|
|
if resp.status_code == 200:
|
|
if resp.encoding and resp.encoding.lower() == 'iso-8859-1':
|
|
resp.encoding = resp.apparent_encoding
|
|
html_content = resp.text
|
|
yield _sse_event({
|
|
'step': 2, 'total': TOTAL,
|
|
'message': f'Strona pobrana ({load_ms}ms, {len(html_content)//1024}KB)',
|
|
'status': 'done'
|
|
})
|
|
else:
|
|
result['errors'].append(f'HTTP {resp.status_code}')
|
|
yield _sse_event({
|
|
'step': 2, 'total': TOTAL,
|
|
'message': f'HTTP {resp.status_code}', 'status': 'warning'
|
|
})
|
|
except Exception as e:
|
|
result['errors'].append(str(e)[:100])
|
|
yield _sse_event({
|
|
'step': 2, 'total': TOTAL,
|
|
'message': f'Błąd: {str(e)[:80]}', 'status': 'error'
|
|
})
|
|
|
|
# Step 3: On-page analysis
|
|
yield _sse_event({
|
|
'step': 3, 'total': TOTAL,
|
|
'message': 'Analiza on-page SEO (meta tagi, nagłówki, obrazy)...',
|
|
'status': 'running'
|
|
})
|
|
|
|
if html_content:
|
|
try:
|
|
onpage_obj = auditor.onpage_analyzer.analyze_html(
|
|
html_content, base_url=final_url
|
|
)
|
|
onpage_dict = onpage_obj.to_dict()
|
|
result['onpage'] = onpage_dict
|
|
|
|
# Build summary
|
|
mt = onpage_dict.get('meta_tags', {})
|
|
imgs = onpage_dict.get('images', {})
|
|
sd = onpage_dict.get('structured_data', {})
|
|
title = mt.get('title', '')
|
|
title_short = (title[:35] + '...') if title and len(title) > 35 else title
|
|
parts = []
|
|
if title:
|
|
parts.append(f'title="{title_short}"')
|
|
parts.append(f'{imgs.get("total_images", 0)} obrazów')
|
|
if imgs.get('images_without_alt'):
|
|
parts.append(f'{imgs["images_without_alt"]} bez alt')
|
|
if sd.get('has_structured_data'):
|
|
parts.append('Schema.org')
|
|
|
|
yield _sse_event({
|
|
'step': 3, 'total': TOTAL,
|
|
'message': f'On-page: {", ".join(parts)}',
|
|
'status': 'done'
|
|
})
|
|
except Exception as e:
|
|
result['errors'].append(f'On-page: {str(e)[:100]}')
|
|
yield _sse_event({
|
|
'step': 3, 'total': TOTAL,
|
|
'message': f'On-page błąd: {str(e)[:80]}',
|
|
'status': 'error'
|
|
})
|
|
else:
|
|
yield _sse_event({
|
|
'step': 3, 'total': TOTAL,
|
|
'message': 'Pominięto (brak HTML)', 'status': 'skipped'
|
|
})
|
|
|
|
# Step 4: Technical SEO
|
|
yield _sse_event({
|
|
'step': 4, 'total': TOTAL,
|
|
'message': 'Sprawdzanie techniczne (robots.txt, sitemap, SSL, canonical)...',
|
|
'status': 'running'
|
|
})
|
|
|
|
try:
|
|
tech_obj = auditor.technical_checker.check_url(final_url)
|
|
tech_dict = tech_obj.to_dict()
|
|
result['technical'] = tech_dict
|
|
|
|
checks_ok = []
|
|
checks_fail = []
|
|
if tech_dict.get('robots_txt', {}).get('exists'):
|
|
checks_ok.append('robots.txt')
|
|
else:
|
|
checks_fail.append('robots.txt')
|
|
if tech_dict.get('sitemap', {}).get('exists'):
|
|
checks_ok.append('sitemap')
|
|
else:
|
|
checks_fail.append('sitemap')
|
|
if tech_dict.get('canonical', {}).get('has_canonical'):
|
|
checks_ok.append('canonical')
|
|
else:
|
|
checks_fail.append('canonical')
|
|
|
|
msg_parts = []
|
|
if checks_ok:
|
|
msg_parts.append(f'OK: {", ".join(checks_ok)}')
|
|
if checks_fail:
|
|
msg_parts.append(f'Brak: {", ".join(checks_fail)}')
|
|
|
|
yield _sse_event({
|
|
'step': 4, 'total': TOTAL,
|
|
'message': f'Technical: {" | ".join(msg_parts)}',
|
|
'status': 'done'
|
|
})
|
|
except Exception as e:
|
|
result['errors'].append(f'Technical: {str(e)[:100]}')
|
|
yield _sse_event({
|
|
'step': 4, 'total': TOTAL,
|
|
'message': f'Technical błąd: {str(e)[:80]}',
|
|
'status': 'error'
|
|
})
|
|
|
|
# Step 5: PageSpeed Insights
|
|
yield _sse_event({
|
|
'step': 5, 'total': TOTAL,
|
|
'message': 'PageSpeed Insights API (może potrwać do 30s)...',
|
|
'status': 'running'
|
|
})
|
|
|
|
try:
|
|
remaining = auditor.pagespeed_client.get_remaining_quota()
|
|
if remaining > 0:
|
|
from pagespeed_client import Strategy
|
|
ps_result = auditor.pagespeed_client.analyze_url(
|
|
final_url, strategy=Strategy.MOBILE
|
|
)
|
|
result['pagespeed'] = ps_result.to_dict()
|
|
result['scores'] = {
|
|
'pagespeed_seo': ps_result.scores.seo,
|
|
'pagespeed_performance': ps_result.scores.performance,
|
|
'pagespeed_accessibility': ps_result.scores.accessibility,
|
|
'pagespeed_best_practices': ps_result.scores.best_practices,
|
|
}
|
|
yield _sse_event({
|
|
'step': 5, 'total': TOTAL,
|
|
'message': (
|
|
f'Perf={ps_result.scores.performance}, '
|
|
f'SEO={ps_result.scores.seo}, '
|
|
f'A11y={ps_result.scores.accessibility}, '
|
|
f'BP={ps_result.scores.best_practices}'
|
|
),
|
|
'status': 'done'
|
|
})
|
|
else:
|
|
result['errors'].append('PageSpeed API quota exceeded')
|
|
yield _sse_event({
|
|
'step': 5, 'total': TOTAL,
|
|
'message': 'Limit API wyczerpany',
|
|
'status': 'warning'
|
|
})
|
|
except Exception as e:
|
|
result['errors'].append(f'PageSpeed: {str(e)[:100]}')
|
|
yield _sse_event({
|
|
'step': 5, 'total': TOTAL,
|
|
'message': f'PageSpeed błąd: {str(e)[:80]}',
|
|
'status': 'error'
|
|
})
|
|
|
|
# Step 6: Local SEO
|
|
yield _sse_event({
|
|
'step': 6, 'total': TOTAL,
|
|
'message': 'Analiza Local SEO (NAP, Google Maps, lokalne słowa kluczowe)...',
|
|
'status': 'running'
|
|
})
|
|
|
|
if html_content:
|
|
try:
|
|
local_seo = auditor.local_seo_analyzer.analyze(
|
|
html_content, final_url, company
|
|
)
|
|
result['local_seo'] = local_seo
|
|
score = local_seo.get('local_seo_score', 0)
|
|
yield _sse_event({
|
|
'step': 6, 'total': TOTAL,
|
|
'message': f'Local SEO score: {score}/100',
|
|
'status': 'done'
|
|
})
|
|
except Exception as e:
|
|
result['errors'].append(f'Local SEO: {str(e)[:100]}')
|
|
yield _sse_event({
|
|
'step': 6, 'total': TOTAL,
|
|
'message': f'Local SEO błąd: {str(e)[:80]}',
|
|
'status': 'error'
|
|
})
|
|
else:
|
|
yield _sse_event({
|
|
'step': 6, 'total': TOTAL,
|
|
'message': 'Pominięto (brak HTML)', 'status': 'skipped'
|
|
})
|
|
|
|
# Step 7: Citations
|
|
yield _sse_event({
|
|
'step': 7, 'total': TOTAL,
|
|
'message': 'Sprawdzanie cytowań w katalogach (Google, Yelp, Facebook)...',
|
|
'status': 'running'
|
|
})
|
|
|
|
try:
|
|
citations = auditor.citation_checker.check_citations(
|
|
'NordaBiznes.pl', 'Wejherowo'
|
|
)
|
|
result['citations'] = citations
|
|
found = sum(1 for c in citations if c.get('status') == 'found')
|
|
yield _sse_event({
|
|
'step': 7, 'total': TOTAL,
|
|
'message': f'Cytowania: {found}/{len(citations)} znalezione',
|
|
'status': 'done'
|
|
})
|
|
except Exception as e:
|
|
result['errors'].append(f'Citations: {str(e)[:100]}')
|
|
yield _sse_event({
|
|
'step': 7, 'total': TOTAL,
|
|
'message': f'Citations błąd: {str(e)[:80]}',
|
|
'status': 'error'
|
|
})
|
|
|
|
# Step 8: Content freshness
|
|
yield _sse_event({
|
|
'step': 8, 'total': TOTAL,
|
|
'message': 'Sprawdzanie aktualności treści...',
|
|
'status': 'running'
|
|
})
|
|
|
|
try:
|
|
freshness = auditor.freshness_checker.check_freshness(
|
|
final_url, html_content
|
|
)
|
|
result['freshness'] = freshness
|
|
fscore = freshness.get('content_freshness_score', 0)
|
|
yield _sse_event({
|
|
'step': 8, 'total': TOTAL,
|
|
'message': f'Aktualność treści: {fscore}/100',
|
|
'status': 'done'
|
|
})
|
|
except Exception as e:
|
|
result['errors'].append(f'Freshness: {str(e)[:100]}')
|
|
yield _sse_event({
|
|
'step': 8, 'total': TOTAL,
|
|
'message': f'Freshness błąd: {str(e)[:80]}',
|
|
'status': 'error'
|
|
})
|
|
|
|
# Calculate overall score (same algorithm as SEOAuditor)
|
|
try:
|
|
overall = auditor._calculate_overall_score(result)
|
|
result['scores']['overall_seo'] = overall
|
|
except Exception:
|
|
pass
|
|
|
|
# Step 9: Save to DB
|
|
yield _sse_event({
|
|
'step': 9, 'total': TOTAL,
|
|
'message': 'Zapisywanie wyników do bazy danych...',
|
|
'status': 'running'
|
|
})
|
|
|
|
try:
|
|
audit_id = _save_audit_to_db(result, notes, user_email)
|
|
|
|
yield _sse_event({
|
|
'step': 9, 'total': TOTAL,
|
|
'message': f'Zapisano audyt #{audit_id}',
|
|
'status': 'done'
|
|
})
|
|
|
|
yield _sse_event({
|
|
'status': 'complete',
|
|
'audit_id': audit_id,
|
|
'performance': result.get('scores', {}).get('pagespeed_performance'),
|
|
'seo': result.get('scores', {}).get('pagespeed_seo'),
|
|
'errors': result.get('errors', []),
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f'Portal SEO save failed: {e}', exc_info=True)
|
|
yield _sse_event({
|
|
'step': 9, 'total': TOTAL,
|
|
'message': f'Błąd zapisu: {str(e)[:80]}',
|
|
'status': 'error'
|
|
})
|
|
yield _sse_event({'status': 'error', 'message': str(e)[:200]})
|
|
|
|
except Exception as e:
|
|
logger.error(f'Portal SEO audit stream failed: {e}', exc_info=True)
|
|
yield _sse_event({
|
|
'status': 'error',
|
|
'message': f'Krytyczny błąd: {str(e)[:200]}'
|
|
})
|
|
|
|
return Response(
|
|
stream_with_context(generate()),
|
|
mimetype='text/event-stream',
|
|
headers={
|
|
'Cache-Control': 'no-cache',
|
|
'X-Accel-Buffering': 'no',
|
|
}
|
|
)
|
|
|
|
|
|
@bp.route('/portal-seo/<int:audit_id>')
|
|
@login_required
|
|
def admin_portal_seo_detail(audit_id):
|
|
"""View detailed results of a specific portal SEO audit"""
|
|
if not is_audit_owner():
|
|
abort(404)
|
|
db = SessionLocal()
|
|
try:
|
|
audit = db.query(PortalSEOAudit).get(audit_id)
|
|
if not audit:
|
|
flash('Audyt nie znaleziony.', 'error')
|
|
return redirect(url_for('admin.admin_portal_seo'))
|
|
|
|
# Get previous audit for comparison
|
|
prev_audit = db.query(PortalSEOAudit).filter(
|
|
PortalSEOAudit.audited_at < audit.audited_at
|
|
).order_by(PortalSEOAudit.audited_at.desc()).first()
|
|
|
|
return render_template(
|
|
'admin/portal_seo_detail.html',
|
|
audit=audit,
|
|
prev_audit=prev_audit,
|
|
portal_url=PORTAL_URL
|
|
)
|
|
finally:
|
|
db.close()
|