diff --git a/scripts/seo_report_generator.py b/scripts/seo_report_generator.py
new file mode 100644
index 0000000..47a48e5
--- /dev/null
+++ b/scripts/seo_report_generator.py
@@ -0,0 +1,1400 @@
+#!/usr/bin/env python3
+"""
+SEO Report Generator for Norda Biznes
+=====================================
+
+Generates HTML reports (single company or batch) and JSON exports from SEO audit data.
+Designed for offline viewing, sharing with clients, and archiving audit results.
+
+Usage:
+ python seo_report_generator.py --company-id 26 --html
+ python seo_report_generator.py --all --html --output ./reports
+ python seo_report_generator.py --batch 1-10 --json
+ python seo_report_generator.py --all --json --output ./exports
+
+Output:
+ - HTML: Styled, standalone reports suitable for viewing in browsers
+ - JSON: Machine-readable exports for integration with other tools
+
+Author: Claude Code
+Date: 2026-01-08
+"""
+
+import os
+import sys
+import json
+import argparse
+import logging
+from datetime import datetime
+from typing import Optional, Dict, List, Any, Tuple
+from pathlib import Path
+from html import escape
+
+from sqlalchemy import create_engine, text
+from sqlalchemy.orm import sessionmaker
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(levelname)s - %(message)s',
+ datefmt='%Y-%m-%d %H:%M:%S'
+)
+logger = logging.getLogger(__name__)
+
+# Database configuration
+DATABASE_URL = os.getenv(
+ 'DATABASE_URL',
+ 'postgresql://nordabiz_app:NordaBiz2025Secure@10.22.68.249:5432/nordabiz'
+)
+
+# Report version for tracking
+REPORT_VERSION = '1.0.0'
+
+
+class SEOReportGenerator:
+ """
+ Generates HTML and JSON reports from SEO audit data stored in database.
+ """
+
+ def __init__(self, database_url: str = DATABASE_URL):
+ """
+ Initialize report generator.
+
+ Args:
+ database_url: Database connection string.
+ """
+ self.engine = create_engine(database_url)
+ self.Session = sessionmaker(bind=self.engine)
+
+ def get_companies_with_seo_data(
+ self,
+ company_ids: Optional[List[int]] = None,
+ batch_start: Optional[int] = None,
+ batch_end: Optional[int] = None
+ ) -> List[Dict[str, Any]]:
+ """
+ Fetch companies with their SEO analysis data from database.
+
+ Args:
+ company_ids: List of specific company IDs to fetch.
+ batch_start: Start index for batch processing (1-indexed).
+ batch_end: End index for batch processing (1-indexed).
+
+ Returns:
+ List of company dicts with SEO analysis data.
+ """
+ with self.Session() as session:
+ base_query = """
+ SELECT
+ c.id, c.name, c.slug, c.website, c.address_city,
+ c.nip, c.email, c.phone,
+ cat.name as category_name,
+ wa.analyzed_at, wa.website_url, wa.final_url,
+ wa.http_status_code, wa.load_time_ms,
+ wa.pagespeed_seo_score, wa.pagespeed_performance_score,
+ wa.pagespeed_accessibility_score, wa.pagespeed_best_practices_score,
+ wa.meta_title, wa.meta_description, wa.meta_keywords,
+ wa.h1_count, wa.h2_count, wa.h3_count, wa.h1_text,
+ wa.total_images, wa.images_without_alt, wa.images_with_alt,
+ wa.internal_links_count, wa.external_links_count,
+ wa.has_structured_data, wa.structured_data_types,
+ wa.has_canonical, wa.canonical_url, wa.is_indexable, wa.noindex_reason,
+ wa.has_sitemap, wa.has_robots_txt,
+ wa.viewport_configured, wa.is_mobile_friendly,
+ wa.largest_contentful_paint_ms, wa.first_input_delay_ms, wa.cumulative_layout_shift,
+ wa.has_og_tags, wa.og_title, wa.og_description, wa.og_image,
+ wa.has_twitter_cards, wa.html_lang, wa.has_hreflang,
+ wa.word_count_homepage,
+ wa.seo_audit_version, wa.seo_audited_at, wa.seo_audit_errors,
+ wa.seo_overall_score, wa.seo_health_score, wa.seo_issues,
+ wa.has_ssl, wa.ssl_expires_at
+ FROM companies c
+ LEFT JOIN company_website_analysis wa ON c.id = wa.company_id
+ LEFT JOIN categories cat ON c.category_id = cat.id
+ WHERE c.is_active = TRUE
+ """
+
+ if company_ids:
+ query = text(base_query + " AND c.id = ANY(:ids) ORDER BY c.id")
+ result = session.execute(query, {'ids': company_ids})
+ elif batch_start is not None and batch_end is not None:
+ query = text(base_query + " ORDER BY c.id OFFSET :offset LIMIT :limit")
+ result = session.execute(query, {
+ 'offset': batch_start - 1,
+ 'limit': batch_end - batch_start + 1
+ })
+ else:
+ query = text(base_query + " ORDER BY c.id")
+ result = session.execute(query)
+
+ companies = []
+ for row in result:
+ company = dict(row._mapping)
+ # Parse JSON fields if they are strings
+ if company.get('seo_issues') and isinstance(company['seo_issues'], str):
+ try:
+ company['seo_issues'] = json.loads(company['seo_issues'])
+ except json.JSONDecodeError:
+ company['seo_issues'] = []
+ if company.get('seo_audit_errors') and isinstance(company['seo_audit_errors'], str):
+ try:
+ company['seo_audit_errors'] = json.loads(company['seo_audit_errors'])
+ except json.JSONDecodeError:
+ company['seo_audit_errors'] = []
+ companies.append(company)
+
+ return companies
+
+ def generate_html_report(
+ self,
+ company: Dict[str, Any],
+ include_recommendations: bool = True
+ ) -> str:
+ """
+ Generate HTML report for a single company.
+
+ Args:
+ company: Company data dict with SEO analysis.
+ include_recommendations: Whether to include improvement recommendations.
+
+ Returns:
+ HTML string of the complete report.
+ """
+ # Escape HTML in all string values
+ def safe(value):
+ if value is None:
+ return ''
+ return escape(str(value))
+
+ # Score color helper
+ def score_color(score):
+ if score is None:
+ return '#6c757d' # gray
+ if score >= 90:
+ return '#28a745' # green
+ if score >= 50:
+ return '#ffc107' # yellow
+ return '#dc3545' # red
+
+ def score_label(score):
+ if score is None:
+ return 'Brak danych'
+ if score >= 90:
+ return 'Doskonały'
+ if score >= 70:
+ return 'Dobry'
+ if score >= 50:
+ return 'Średni'
+ return 'Wymaga poprawy'
+
+ # Generate recommendations based on issues
+ recommendations = []
+ if include_recommendations:
+ recommendations = self._generate_recommendations(company)
+
+ # Build HTML
+ html = f'''
+
+
+
+
+ Raport SEO - {safe(company.get('name'))}
+
+
+
+
+
+
+
+
+
+
Wyniki SEO
+
+
+
Ogolny wynik SEO
+
+ {company.get('seo_overall_score') if company.get('seo_overall_score') is not None else '—'}
+
+
{score_label(company.get('seo_overall_score'))}
+
+
+
PageSpeed SEO
+
+ {company.get('pagespeed_seo_score') if company.get('pagespeed_seo_score') is not None else '—'}
+
+
{score_label(company.get('pagespeed_seo_score'))}
+
+
+
Wydajnosc
+
+ {company.get('pagespeed_performance_score') if company.get('pagespeed_performance_score') is not None else '—'}
+
+
{score_label(company.get('pagespeed_performance_score'))}
+
+
+
Dostepnosc
+
+ {company.get('pagespeed_accessibility_score') if company.get('pagespeed_accessibility_score') is not None else '—'}
+
+
{score_label(company.get('pagespeed_accessibility_score'))}
+
+
+
+
+
+
+
Szczegoly techniczne
+
+
+
Meta tagi
+
+ Tytul strony
+
+ {self._truncate(safe(company.get('meta_title')), 40) or '—'}
+
+
+
+ Opis meta
+
+ {f'Tak ({len(company.get("meta_description") or "")} zn.)' if company.get('meta_description') else 'Brak'}
+
+
+
+ Canonical URL
+
+ {'Tak' if company.get('has_canonical') else 'Nie'}
+
+
+
+
+
Struktura naglowkow
+
+ H1
+
+ {self._h1_badge(company.get('h1_count'))}
+
+
+
+ H2
+ {company.get('h2_count') if company.get('h2_count') is not None else '—'}
+
+
+ H3
+ {company.get('h3_count') if company.get('h3_count') is not None else '—'}
+
+
+
+
Obrazy
+
+ Liczba obrazow
+ {company.get('total_images') if company.get('total_images') is not None else '—'}
+
+
+ Bez alt
+
+ {self._images_alt_badge(company.get('images_without_alt'), company.get('total_images'))}
+
+
+
+ Z alt
+ {company.get('images_with_alt') if company.get('images_with_alt') is not None else '—'}
+
+
+
+
Linki
+
+ Wewnetrzne
+ {company.get('internal_links_count') if company.get('internal_links_count') is not None else '—'}
+
+
+ Zewnetrzne
+ {company.get('external_links_count') if company.get('external_links_count') is not None else '—'}
+
+
+
+
+
+
+
+
Techniczne SEO
+
+
+
Pliki i indeksowanie
+
+ robots.txt
+
+ {'Tak' if company.get('has_robots_txt') else 'Nie'}
+
+
+
+ sitemap.xml
+
+ {'Tak' if company.get('has_sitemap') else 'Nie'}
+
+
+
+ Indeksowalnosc
+
+ {'Tak' if company.get('is_indexable') else f'Nie ({safe(company.get("noindex_reason") or "")})'}
+
+
+
+
+
Bezpieczenstwo i mobilnosc
+
+ SSL/HTTPS
+
+ {'Tak' if company.get('has_ssl') else 'Nie'}
+
+
+
+ Viewport
+
+ {'Tak' if company.get('viewport_configured') else 'Nie'}
+
+
+
+ Mobile-friendly
+
+ {'Tak' if company.get('is_mobile_friendly') else 'Nie'}
+
+
+
+
+
Dane strukturalne
+
+ Schema.org
+
+ {'Tak' if company.get('has_structured_data') else 'Nie'}
+
+
+
+ Typy
+
+ {', '.join(company.get('structured_data_types') or []) or '—'}
+
+
+
+
+
Social Media
+
+ Open Graph
+
+ {'Tak' if company.get('has_og_tags') else 'Nie'}
+
+
+
+ Twitter Cards
+
+ {'Tak' if company.get('has_twitter_cards') else 'Nie'}
+
+
+
+ Jezyk (lang)
+ {safe(company.get('html_lang')) or '—'}
+
+
+
+
+
+
+ {self._core_web_vitals_section(company)}
+
+
+ {self._issues_section(company)}
+
+
+ {self._recommendations_section(recommendations) if recommendations else ''}
+
+
+
+
+
+
+'''
+
+ return html
+
+ def _truncate(self, text: str, length: int) -> str:
+ """Truncate text with ellipsis."""
+ if not text:
+ return ''
+ if len(text) <= length:
+ return text
+ return text[:length] + '...'
+
+ def _h1_badge(self, count: Optional[int]) -> str:
+ """Generate badge for H1 count."""
+ if count is None:
+ return '—'
+ if count == 1:
+ return f'{count}'
+ if count == 0:
+ return '0 (brak!)'
+ return f'{count} (za duzo)'
+
+ def _images_alt_badge(self, without_alt: Optional[int], total: Optional[int]) -> str:
+ """Generate badge for images without alt."""
+ if without_alt is None:
+ return '—'
+ if without_alt == 0:
+ return '0'
+ if total and without_alt / total > 0.5:
+ return f'{without_alt}'
+ return f'{without_alt}'
+
+ def _core_web_vitals_section(self, company: Dict[str, Any]) -> str:
+ """Generate Core Web Vitals section HTML."""
+ lcp = company.get('largest_contentful_paint_ms')
+ fid = company.get('first_input_delay_ms')
+ cls = company.get('cumulative_layout_shift')
+
+ if lcp is None and fid is None and cls is None:
+ return ''
+
+ def lcp_status(val):
+ if val is None:
+ return ('—', 'badge-secondary')
+ if val <= 2500:
+ return (f'{val}ms', 'badge-success')
+ if val <= 4000:
+ return (f'{val}ms', 'badge-warning')
+ return (f'{val}ms', 'badge-danger')
+
+ def fid_status(val):
+ if val is None:
+ return ('—', 'badge-secondary')
+ if val <= 100:
+ return (f'{val}ms', 'badge-success')
+ if val <= 300:
+ return (f'{val}ms', 'badge-warning')
+ return (f'{val}ms', 'badge-danger')
+
+ def cls_status(val):
+ if val is None:
+ return ('—', 'badge-secondary')
+ if val <= 0.1:
+ return (f'{val:.3f}', 'badge-success')
+ if val <= 0.25:
+ return (f'{val:.3f}', 'badge-warning')
+ return (f'{val:.3f}', 'badge-danger')
+
+ lcp_val, lcp_class = lcp_status(lcp)
+ fid_val, fid_class = fid_status(fid)
+ cls_val, cls_class = cls_status(cls)
+
+ return f'''
+
+
Core Web Vitals
+
+
+
LCP (Largest Contentful Paint)
+
+ Wynik
+ {lcp_val}
+
+
+ Cel
+ < 2500ms
+
+
+
+
FID (First Input Delay)
+
+ Wynik
+ {fid_val}
+
+
+ Cel
+ < 100ms
+
+
+
+
CLS (Cumulative Layout Shift)
+
+ Wynik
+ {cls_val}
+
+
+ Cel
+ < 0.1
+
+
+
+
+ '''
+
+ def _issues_section(self, company: Dict[str, Any]) -> str:
+ """Generate issues section HTML."""
+ issues = company.get('seo_issues') or []
+ errors = company.get('seo_audit_errors') or []
+
+ if not issues and not errors:
+ return ''
+
+ items_html = ''
+ for issue in issues:
+ if isinstance(issue, dict):
+ severity = issue.get('severity', 'info')
+ message = escape(issue.get('message', ''))
+ else:
+ severity = 'info'
+ message = escape(str(issue))
+ items_html += f'{message}\n'
+
+ for error in errors:
+ items_html += f'{escape(str(error))}\n'
+
+ return f'''
+
+ '''
+
+ def _recommendations_section(self, recommendations: List[str]) -> str:
+ """Generate recommendations section HTML."""
+ if not recommendations:
+ return ''
+
+ items_html = ''.join(f'{escape(rec)}\n' for rec in recommendations)
+
+ return f'''
+
+ '''
+
+ def _generate_recommendations(self, company: Dict[str, Any]) -> List[str]:
+ """Generate SEO improvement recommendations based on audit data."""
+ recommendations = []
+
+ # Meta tags
+ if not company.get('meta_title'):
+ recommendations.append(
+ 'Dodaj znacznik do strony. Powinien miec 50-60 znakow i zawierac slowa kluczowe.'
+ )
+ if not company.get('meta_description'):
+ recommendations.append(
+ 'Dodaj meta description (150-160 znakow). Dobry opis zwieksza CTR w wynikach wyszukiwania.'
+ )
+
+ # Headings
+ h1_count = company.get('h1_count')
+ if h1_count == 0:
+ recommendations.append(
+ 'Dodaj naglowek H1 do strony. Kazda strona powinna miec dokladnie jeden H1.'
+ )
+ elif h1_count and h1_count > 1:
+ recommendations.append(
+ f'Strona ma {h1_count} naglowkow H1. Pozostaw tylko jeden glowny naglowek H1.'
+ )
+
+ # Images
+ images_without_alt = company.get('images_without_alt')
+ if images_without_alt and images_without_alt > 0:
+ recommendations.append(
+ f'Dodaj atrybuty alt do {images_without_alt} obrazow. Alt poprawia SEO i dostepnosc.'
+ )
+
+ # Technical SEO
+ if not company.get('has_robots_txt'):
+ recommendations.append(
+ 'Utworz plik robots.txt w glownym katalogu strony.'
+ )
+ if not company.get('has_sitemap'):
+ recommendations.append(
+ 'Utworz i zglos mape strony (sitemap.xml) w Google Search Console.'
+ )
+ if not company.get('has_canonical'):
+ recommendations.append(
+ 'Dodaj znacznik canonical URL aby uniknac problemow z duplikacja tresci.'
+ )
+ if not company.get('has_ssl'):
+ recommendations.append(
+ 'Wlacz certyfikat SSL (HTTPS). Google premiuje strony z bezpiecznym polaczeniem.'
+ )
+
+ # Mobile
+ if not company.get('viewport_configured'):
+ recommendations.append(
+ 'Dodaj znacznik viewport meta dla prawidlowego wyswietlania na urzadzeniach mobilnych.'
+ )
+
+ # Structured data
+ if not company.get('has_structured_data'):
+ recommendations.append(
+ 'Dodaj dane strukturalne (Schema.org) - np. LocalBusiness dla lepszej widocznosci w Google.'
+ )
+
+ # Open Graph
+ if not company.get('has_og_tags'):
+ recommendations.append(
+ 'Dodaj znaczniki Open Graph dla lepszego wygladu przy udostepnianiu w mediach spolecznosciowych.'
+ )
+
+ # Performance
+ lcp = company.get('largest_contentful_paint_ms')
+ if lcp and lcp > 2500:
+ recommendations.append(
+ f'Popraw LCP (obecnie {lcp}ms). Zoptymalizuj obrazy i skrypty dla szybszego ladowania.'
+ )
+
+ cls = company.get('cumulative_layout_shift')
+ if cls and cls > 0.1:
+ recommendations.append(
+ f'Popraw CLS (obecnie {cls:.3f}). Zdefiniuj wymiary obrazow i unikaj dynamicznego dodawania tresci.'
+ )
+
+ return recommendations
+
+ def generate_batch_html_report(
+ self,
+ companies: List[Dict[str, Any]],
+ title: str = "Raport SEO - Norda Biznes"
+ ) -> str:
+ """
+ Generate batch HTML report summarizing multiple companies.
+
+ Args:
+ companies: List of company data dicts with SEO analysis.
+ title: Report title.
+
+ Returns:
+ HTML string of the batch summary report.
+ """
+ def safe(value):
+ if value is None:
+ return ''
+ return escape(str(value))
+
+ def score_color(score):
+ if score is None:
+ return '#6c757d'
+ if score >= 90:
+ return '#28a745'
+ if score >= 50:
+ return '#ffc107'
+ return '#dc3545'
+
+ # Calculate statistics
+ total = len(companies)
+ audited = sum(1 for c in companies if c.get('seo_audited_at'))
+ scores = [c.get('seo_overall_score') for c in companies if c.get('seo_overall_score') is not None]
+ avg_score = sum(scores) / len(scores) if scores else 0
+ excellent = sum(1 for s in scores if s >= 90)
+ good = sum(1 for s in scores if 70 <= s < 90)
+ fair = sum(1 for s in scores if 50 <= s < 70)
+ poor = sum(1 for s in scores if s < 50)
+
+ # Generate table rows
+ rows_html = ''
+ for company in sorted(companies, key=lambda c: c.get('seo_overall_score') or 0, reverse=True):
+ overall = company.get('seo_overall_score')
+ perf = company.get('pagespeed_performance_score')
+ seo = company.get('pagespeed_seo_score')
+ acc = company.get('pagespeed_accessibility_score')
+
+ rows_html += f'''
+
+ | {safe(company.get('name'))} |
+ {safe(company.get('category_name') or '—')} |
+ {overall if overall is not None else '—'} |
+ {seo if seo is not None else '—'} |
+ {perf if perf is not None else '—'} |
+ {acc if acc is not None else '—'} |
+ {'Tak' if company.get('has_ssl') else 'Nie'} |
+ {'Tak' if company.get('is_mobile_friendly') else 'Nie'} |
+
+ '''
+
+ html = f'''
+
+
+
+
+ {safe(title)}
+
+
+
+
+
+
+
+
+
{total}
+
Firm w raporcie
+
+
+
{audited}
+
Przebadanych
+
+
+
{avg_score:.1f}
+
Sredni wynik
+
+
+
{excellent}
+
Doskonaly (90+)
+
+
+
{good}
+
Dobry (70-89)
+
+
+
{fair}
+
Sredni (50-69)
+
+
+
+
+
Wyniki poszczegolnych firm
+
+
+
+ | Firma |
+ Kategoria |
+ Wynik SEO |
+ PS SEO |
+ Wydajnosc |
+ Dostepnosc |
+ SSL |
+ Mobile |
+
+
+
+ {rows_html}
+
+
+
+
+
+
+'''
+
+ return html
+
+ def generate_json_export(
+ self,
+ companies: List[Dict[str, Any]],
+ include_raw_data: bool = False
+ ) -> Dict[str, Any]:
+ """
+ Generate JSON export of SEO audit data.
+
+ Args:
+ companies: List of company data dicts.
+ include_raw_data: Whether to include all raw data fields.
+
+ Returns:
+ Dict ready for JSON serialization.
+ """
+ export = {
+ 'report_version': REPORT_VERSION,
+ 'generated_at': datetime.now().isoformat(),
+ 'total_companies': len(companies),
+ 'audited_companies': sum(1 for c in companies if c.get('seo_audited_at')),
+ 'statistics': self._calculate_statistics(companies),
+ 'companies': []
+ }
+
+ for company in companies:
+ company_data = {
+ 'id': company.get('id'),
+ 'name': company.get('name'),
+ 'slug': company.get('slug'),
+ 'website': company.get('website') or company.get('website_url'),
+ 'category': company.get('category_name'),
+ 'nip': company.get('nip'),
+ 'city': company.get('address_city'),
+ 'seo_audit': {
+ 'audited_at': company.get('seo_audited_at').isoformat() if company.get('seo_audited_at') else None,
+ 'audit_version': company.get('seo_audit_version'),
+ 'overall_score': company.get('seo_overall_score'),
+ 'health_score': company.get('seo_health_score'),
+ }
+ }
+
+ if company.get('seo_audited_at'):
+ company_data['seo_audit']['pagespeed'] = {
+ 'seo_score': company.get('pagespeed_seo_score'),
+ 'performance_score': company.get('pagespeed_performance_score'),
+ 'accessibility_score': company.get('pagespeed_accessibility_score'),
+ 'best_practices_score': company.get('pagespeed_best_practices_score'),
+ }
+ company_data['seo_audit']['on_page'] = {
+ 'meta_title': company.get('meta_title'),
+ 'meta_description': company.get('meta_description'),
+ 'h1_count': company.get('h1_count'),
+ 'h2_count': company.get('h2_count'),
+ 'h3_count': company.get('h3_count'),
+ 'total_images': company.get('total_images'),
+ 'images_without_alt': company.get('images_without_alt'),
+ 'internal_links': company.get('internal_links_count'),
+ 'external_links': company.get('external_links_count'),
+ 'has_structured_data': company.get('has_structured_data'),
+ 'structured_data_types': company.get('structured_data_types'),
+ }
+ company_data['seo_audit']['technical'] = {
+ 'has_ssl': company.get('has_ssl'),
+ 'has_sitemap': company.get('has_sitemap'),
+ 'has_robots_txt': company.get('has_robots_txt'),
+ 'has_canonical': company.get('has_canonical'),
+ 'is_indexable': company.get('is_indexable'),
+ 'is_mobile_friendly': company.get('is_mobile_friendly'),
+ 'viewport_configured': company.get('viewport_configured'),
+ 'http_status': company.get('http_status_code'),
+ 'load_time_ms': company.get('load_time_ms'),
+ }
+ company_data['seo_audit']['core_web_vitals'] = {
+ 'lcp_ms': company.get('largest_contentful_paint_ms'),
+ 'fid_ms': company.get('first_input_delay_ms'),
+ 'cls': float(company.get('cumulative_layout_shift')) if company.get('cumulative_layout_shift') else None,
+ }
+ company_data['seo_audit']['social'] = {
+ 'has_og_tags': company.get('has_og_tags'),
+ 'og_title': company.get('og_title'),
+ 'has_twitter_cards': company.get('has_twitter_cards'),
+ }
+ company_data['seo_audit']['issues'] = company.get('seo_issues') or []
+ company_data['seo_audit']['errors'] = company.get('seo_audit_errors') or []
+
+ export['companies'].append(company_data)
+
+ return export
+
+ def _calculate_statistics(self, companies: List[Dict[str, Any]]) -> Dict[str, Any]:
+ """Calculate summary statistics from company data."""
+ scores = [c.get('seo_overall_score') for c in companies if c.get('seo_overall_score') is not None]
+ perf_scores = [c.get('pagespeed_performance_score') for c in companies if c.get('pagespeed_performance_score') is not None]
+
+ stats = {
+ 'seo_scores': {
+ 'average': round(sum(scores) / len(scores), 1) if scores else None,
+ 'min': min(scores) if scores else None,
+ 'max': max(scores) if scores else None,
+ 'count': len(scores),
+ 'distribution': {
+ 'excellent_90_100': sum(1 for s in scores if s >= 90),
+ 'good_70_89': sum(1 for s in scores if 70 <= s < 90),
+ 'fair_50_69': sum(1 for s in scores if 50 <= s < 70),
+ 'poor_0_49': sum(1 for s in scores if s < 50),
+ }
+ },
+ 'performance_scores': {
+ 'average': round(sum(perf_scores) / len(perf_scores), 1) if perf_scores else None,
+ 'count': len(perf_scores),
+ },
+ 'technical': {
+ 'with_ssl': sum(1 for c in companies if c.get('has_ssl')),
+ 'with_sitemap': sum(1 for c in companies if c.get('has_sitemap')),
+ 'with_robots_txt': sum(1 for c in companies if c.get('has_robots_txt')),
+ 'mobile_friendly': sum(1 for c in companies if c.get('is_mobile_friendly')),
+ 'with_structured_data': sum(1 for c in companies if c.get('has_structured_data')),
+ 'with_og_tags': sum(1 for c in companies if c.get('has_og_tags')),
+ },
+ }
+
+ return stats
+
+ def save_html_report(
+ self,
+ html: str,
+ output_path: str
+ ) -> str:
+ """
+ Save HTML report to file.
+
+ Args:
+ html: HTML content string.
+ output_path: Path to save file.
+
+ Returns:
+ Full path to saved file.
+ """
+ path = Path(output_path)
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(html, encoding='utf-8')
+ return str(path.absolute())
+
+ def save_json_export(
+ self,
+ data: Dict[str, Any],
+ output_path: str
+ ) -> str:
+ """
+ Save JSON export to file.
+
+ Args:
+ data: Data dict to serialize.
+ output_path: Path to save file.
+
+ Returns:
+ Full path to saved file.
+ """
+ path = Path(output_path)
+ path.parent.mkdir(parents=True, exist_ok=True)
+
+ def json_serializer(obj):
+ if hasattr(obj, 'isoformat'):
+ return obj.isoformat()
+ if hasattr(obj, '__float__'):
+ return float(obj)
+ raise TypeError(f'Object of type {type(obj)} is not JSON serializable')
+
+ with open(path, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer)
+
+ return str(path.absolute())
+
+
+def parse_batch_argument(batch_str: str) -> Tuple[int, int]:
+ """Parse batch argument in format 'START-END'."""
+ if '-' not in batch_str:
+ raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)")
+
+ parts = batch_str.split('-')
+ if len(parts) != 2:
+ raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)")
+
+ try:
+ start = int(parts[0].strip())
+ end = int(parts[1].strip())
+ except ValueError:
+ raise ValueError(f"Invalid batch values '{batch_str}'. START and END must be numbers")
+
+ if start < 1:
+ raise ValueError(f"Invalid batch start '{start}'. Must be >= 1")
+ if end < start:
+ raise ValueError(f"Invalid batch range '{start}-{end}'. END must be >= START")
+
+ return start, end
+
+
+def main():
+ """Main entry point for CLI usage."""
+ parser = argparse.ArgumentParser(
+ description='Generate SEO reports from Norda Biznes audit data',
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ epilog="""
+Examples:
+ python seo_report_generator.py --company-id 26 --html
+ python seo_report_generator.py --all --html --output ./reports
+ python seo_report_generator.py --batch 1-10 --json
+ python seo_report_generator.py --all --json --output ./exports
+ python seo_report_generator.py --all --html --json --output ./output
+
+Output formats:
+ --html Generate styled HTML reports
+ --json Generate JSON exports for integration
+
+File naming:
+ Single company HTML: seo_report_{slug}.html
+ Batch HTML summary: seo_report_batch_{timestamp}.html
+ JSON export: seo_export_{timestamp}.json
+ """
+ )
+
+ # Selection arguments
+ selection = parser.add_argument_group('Company Selection (choose one)')
+ selection.add_argument('--company-id', type=int, metavar='ID',
+ help='Generate report for single company by ID')
+ selection.add_argument('--company-ids', type=str, metavar='IDS',
+ help='Generate reports for multiple companies (comma-separated IDs)')
+ selection.add_argument('--batch', type=str, metavar='RANGE',
+ help='Generate reports for batch of companies (e.g., 1-10)')
+ selection.add_argument('--all', action='store_true',
+ help='Generate reports for all companies')
+
+ # Output format arguments
+ output_group = parser.add_argument_group('Output Format')
+ output_group.add_argument('--html', action='store_true',
+ help='Generate HTML reports')
+ output_group.add_argument('--json', action='store_true',
+ help='Generate JSON export')
+
+ # Options
+ options = parser.add_argument_group('Options')
+ options.add_argument('--output', '-o', type=str, metavar='DIR', default='.',
+ help='Output directory (default: current directory)')
+ options.add_argument('--no-recommendations', action='store_true',
+ help='Exclude recommendations from HTML reports')
+ options.add_argument('--batch-summary', action='store_true',
+ help='Generate batch summary HTML instead of individual reports')
+ options.add_argument('--verbose', '-v', action='store_true',
+ help='Verbose output')
+ options.add_argument('--database-url', type=str, metavar='URL',
+ help='Database connection URL')
+
+ args = parser.parse_args()
+
+ # Configure logging
+ if args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ # Validate selection
+ selection_count = sum([
+ args.company_id is not None,
+ args.company_ids is not None,
+ args.batch is not None,
+ args.all
+ ])
+
+ if selection_count == 0:
+ parser.print_help()
+ print("\nError: Please specify one of --company-id, --company-ids, --batch, or --all")
+ sys.exit(1)
+
+ if selection_count > 1:
+ print("Error: Please specify only one selection method")
+ sys.exit(1)
+
+ # Validate output format
+ if not args.html and not args.json:
+ parser.print_help()
+ print("\nError: Please specify at least one output format: --html or --json")
+ sys.exit(1)
+
+ # Parse selection arguments
+ company_ids = None
+ batch_start, batch_end = None, None
+
+ if args.company_id:
+ company_ids = [args.company_id]
+ elif args.company_ids:
+ try:
+ company_ids = [int(x.strip()) for x in args.company_ids.split(',')]
+ except ValueError:
+ print("Error: Invalid --company-ids format. Use comma-separated integers")
+ sys.exit(1)
+ elif args.batch:
+ try:
+ batch_start, batch_end = parse_batch_argument(args.batch)
+ except ValueError as e:
+ print(f"Error: {e}")
+ sys.exit(1)
+
+ # Initialize generator
+ database_url = args.database_url or DATABASE_URL
+ try:
+ generator = SEOReportGenerator(database_url=database_url)
+ except Exception as e:
+ logger.error(f"Failed to initialize: {e}")
+ print(f"Error: {e}")
+ sys.exit(1)
+
+ # Fetch data
+ logger.info("Fetching company data from database...")
+ try:
+ companies = generator.get_companies_with_seo_data(
+ company_ids=company_ids,
+ batch_start=batch_start,
+ batch_end=batch_end
+ )
+ except Exception as e:
+ logger.error(f"Failed to fetch data: {e}")
+ print(f"Error fetching data: {e}")
+ sys.exit(1)
+
+ if not companies:
+ print("No companies found matching the criteria")
+ sys.exit(1)
+
+ logger.info(f"Found {len(companies)} companies")
+
+ # Create output directory
+ output_dir = Path(args.output)
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+ generated_files = []
+
+ # Generate HTML reports
+ if args.html:
+ if args.batch_summary or len(companies) > 1:
+ # Generate batch summary
+ logger.info("Generating batch HTML summary...")
+ html = generator.generate_batch_html_report(companies)
+ filename = f"seo_report_batch_{timestamp}.html"
+ filepath = generator.save_html_report(html, output_dir / filename)
+ generated_files.append(filepath)
+ logger.info(f"Saved: {filepath}")
+
+ # Also generate individual reports if not only summary
+ if not args.batch_summary:
+ for company in companies:
+ slug = company.get('slug', f"company_{company.get('id')}")
+ html = generator.generate_html_report(
+ company,
+ include_recommendations=not args.no_recommendations
+ )
+ filename = f"seo_report_{slug}.html"
+ filepath = generator.save_html_report(html, output_dir / filename)
+ generated_files.append(filepath)
+ logger.debug(f"Saved: {filepath}")
+
+ logger.info(f"Generated {len(companies)} individual HTML reports")
+ else:
+ # Single company report
+ company = companies[0]
+ slug = company.get('slug', f"company_{company.get('id')}")
+ html = generator.generate_html_report(
+ company,
+ include_recommendations=not args.no_recommendations
+ )
+ filename = f"seo_report_{slug}.html"
+ filepath = generator.save_html_report(html, output_dir / filename)
+ generated_files.append(filepath)
+ logger.info(f"Saved: {filepath}")
+
+ # Generate JSON export
+ if args.json:
+ logger.info("Generating JSON export...")
+ data = generator.generate_json_export(companies)
+ filename = f"seo_export_{timestamp}.json"
+ filepath = generator.save_json_export(data, output_dir / filename)
+ generated_files.append(filepath)
+ logger.info(f"Saved: {filepath}")
+
+ # Summary
+ print("\n" + "=" * 60)
+ print("REPORT GENERATION COMPLETE")
+ print("=" * 60)
+ print(f"Companies processed: {len(companies)}")
+ print(f"Files generated: {len(generated_files)}")
+ print(f"Output directory: {output_dir.absolute()}")
+ print("\nGenerated files:")
+ for f in generated_files:
+ print(f" - {f}")
+ print("=" * 60)
+
+
+if __name__ == '__main__':
+ main()