#!/usr/bin/env python3 """ SEO Report Generator for Norda Biznes ===================================== Generates HTML reports (single company or batch) and JSON exports from SEO audit data. Designed for offline viewing, sharing with clients, and archiving audit results. Usage: python seo_report_generator.py --company-id 26 --html python seo_report_generator.py --all --html --output ./reports python seo_report_generator.py --batch 1-10 --json python seo_report_generator.py --all --json --output ./exports Output: - HTML: Styled, standalone reports suitable for viewing in browsers - JSON: Machine-readable exports for integration with other tools Author: Maciej Pienczyn, InPi sp. z o.o. Date: 2026-01-08 """ import os import sys import json import argparse import logging from datetime import datetime from typing import Optional, Dict, List, Any, Tuple from pathlib import Path from html import escape from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) logger = logging.getLogger(__name__) # Database configuration # WARNING: The fallback DATABASE_URL uses a placeholder password. # Production credentials MUST be set via the DATABASE_URL environment variable. # NEVER commit real credentials to version control (CWE-798). DATABASE_URL = os.getenv( 'DATABASE_URL', 'postgresql://nordabiz_app:CHANGE_ME@127.0.0.1:5432/nordabiz' ) # Report version for tracking REPORT_VERSION = '1.0.0' class SEOReportGenerator: """ Generates HTML and JSON reports from SEO audit data stored in database. """ def __init__(self, database_url: str = DATABASE_URL): """ Initialize report generator. Args: database_url: Database connection string. """ self.engine = create_engine(database_url) self.Session = sessionmaker(bind=self.engine) def get_companies_with_seo_data( self, company_ids: Optional[List[int]] = None, batch_start: Optional[int] = None, batch_end: Optional[int] = None ) -> List[Dict[str, Any]]: """ Fetch companies with their SEO analysis data from database. Args: company_ids: List of specific company IDs to fetch. batch_start: Start index for batch processing (1-indexed). batch_end: End index for batch processing (1-indexed). Returns: List of company dicts with SEO analysis data. """ with self.Session() as session: base_query = """ SELECT c.id, c.name, c.slug, c.website, c.address_city, c.nip, c.email, c.phone, cat.name as category_name, wa.analyzed_at, wa.website_url, wa.final_url, wa.http_status_code, wa.load_time_ms, wa.pagespeed_seo_score, wa.pagespeed_performance_score, wa.pagespeed_accessibility_score, wa.pagespeed_best_practices_score, wa.meta_title, wa.meta_description, wa.meta_keywords, wa.h1_count, wa.h2_count, wa.h3_count, wa.h1_text, wa.total_images, wa.images_without_alt, wa.images_with_alt, wa.internal_links_count, wa.external_links_count, wa.has_structured_data, wa.structured_data_types, wa.has_canonical, wa.canonical_url, wa.is_indexable, wa.noindex_reason, wa.has_sitemap, wa.has_robots_txt, wa.viewport_configured, wa.is_mobile_friendly, wa.largest_contentful_paint_ms, wa.interaction_to_next_paint_ms, wa.cumulative_layout_shift, wa.has_og_tags, wa.og_title, wa.og_description, wa.og_image, wa.has_twitter_cards, wa.html_lang, wa.has_hreflang, wa.word_count_homepage, wa.seo_audit_version, wa.seo_audited_at, wa.seo_audit_errors, wa.seo_overall_score, wa.seo_health_score, wa.seo_issues, wa.has_ssl, wa.ssl_expires_at FROM companies c LEFT JOIN company_website_analysis wa ON c.id = wa.company_id LEFT JOIN categories cat ON c.category_id = cat.id WHERE c.is_active = TRUE """ if company_ids: query = text(base_query + " AND c.id = ANY(:ids) ORDER BY c.id") result = session.execute(query, {'ids': company_ids}) elif batch_start is not None and batch_end is not None: query = text(base_query + " ORDER BY c.id OFFSET :offset LIMIT :limit") result = session.execute(query, { 'offset': batch_start - 1, 'limit': batch_end - batch_start + 1 }) else: query = text(base_query + " ORDER BY c.id") result = session.execute(query) companies = [] for row in result: company = dict(row._mapping) # Parse JSON fields if they are strings if company.get('seo_issues') and isinstance(company['seo_issues'], str): try: company['seo_issues'] = json.loads(company['seo_issues']) except json.JSONDecodeError: company['seo_issues'] = [] if company.get('seo_audit_errors') and isinstance(company['seo_audit_errors'], str): try: company['seo_audit_errors'] = json.loads(company['seo_audit_errors']) except json.JSONDecodeError: company['seo_audit_errors'] = [] companies.append(company) return companies def generate_html_report( self, company: Dict[str, Any], include_recommendations: bool = True ) -> str: """ Generate HTML report for a single company. Args: company: Company data dict with SEO analysis. include_recommendations: Whether to include improvement recommendations. Returns: HTML string of the complete report. """ # Escape HTML in all string values def safe(value): if value is None: return '' return escape(str(value)) # Score color helper def score_color(score): if score is None: return '#6c757d' # gray if score >= 90: return '#28a745' # green if score >= 50: return '#ffc107' # yellow return '#dc3545' # red def score_label(score): if score is None: return 'Brak danych' if score >= 90: return 'Doskonały' if score >= 70: return 'Dobry' if score >= 50: return 'Średni' return 'Wymaga poprawy' # Generate recommendations based on issues recommendations = [] if include_recommendations: recommendations = self._generate_recommendations(company) # Build HTML html = f''' Raport SEO - {safe(company.get('name'))}

{safe(company.get('name'))}

Raport SEO wygenerowany: {datetime.now().strftime('%d.%m.%Y %H:%M')}
Strona: {safe(company.get('website') or company.get('website_url') or 'Brak')}
{f'
Kategoria: {safe(company.get("category_name"))}
' if company.get('category_name') else ''}

Wyniki SEO

Ogolny wynik SEO
{company.get('seo_overall_score') if company.get('seo_overall_score') is not None else '—'}
{score_label(company.get('seo_overall_score'))}
PageSpeed SEO
{company.get('pagespeed_seo_score') if company.get('pagespeed_seo_score') is not None else '—'}
{score_label(company.get('pagespeed_seo_score'))}
Wydajnosc
{company.get('pagespeed_performance_score') if company.get('pagespeed_performance_score') is not None else '—'}
{score_label(company.get('pagespeed_performance_score'))}
Dostepnosc
{company.get('pagespeed_accessibility_score') if company.get('pagespeed_accessibility_score') is not None else '—'}
{score_label(company.get('pagespeed_accessibility_score'))}

Szczegoly techniczne

Meta tagi

Tytul strony {self._truncate(safe(company.get('meta_title')), 40) or '—'}
Opis meta {f'Tak ({len(company.get("meta_description") or "")} zn.)' if company.get('meta_description') else 'Brak'}
Canonical URL {'Tak' if company.get('has_canonical') else 'Nie'}

Struktura naglowkow

H1 {self._h1_badge(company.get('h1_count'))}
H2 {company.get('h2_count') if company.get('h2_count') is not None else '—'}
H3 {company.get('h3_count') if company.get('h3_count') is not None else '—'}

Obrazy

Liczba obrazow {company.get('total_images') if company.get('total_images') is not None else '—'}
Bez alt {self._images_alt_badge(company.get('images_without_alt'), company.get('total_images'))}
Z alt {company.get('images_with_alt') if company.get('images_with_alt') is not None else '—'}

Linki

Wewnetrzne {company.get('internal_links_count') if company.get('internal_links_count') is not None else '—'}
Zewnetrzne {company.get('external_links_count') if company.get('external_links_count') is not None else '—'}

Techniczne SEO

Pliki i indeksowanie

robots.txt {'Tak' if company.get('has_robots_txt') else 'Nie'}
sitemap.xml {'Tak' if company.get('has_sitemap') else 'Nie'}
Indeksowalnosc {'Tak' if company.get('is_indexable') else f'Nie ({safe(company.get("noindex_reason") or "")})'}

Bezpieczenstwo i mobilnosc

SSL/HTTPS {'Tak' if company.get('has_ssl') else 'Nie'}
Viewport {'Tak' if company.get('viewport_configured') else 'Nie'}
Mobile-friendly {'Tak' if company.get('is_mobile_friendly') else 'Nie'}

Dane strukturalne

Schema.org {'Tak' if company.get('has_structured_data') else 'Nie'}
Typy {', '.join(company.get('structured_data_types') or []) or '—'}

Social Media

Open Graph {'Tak' if company.get('has_og_tags') else 'Nie'}
Twitter Cards {'Tak' if company.get('has_twitter_cards') else 'Nie'}
Jezyk (lang) {safe(company.get('html_lang')) or '—'}
{self._core_web_vitals_section(company)} {self._issues_section(company)} {self._recommendations_section(recommendations) if recommendations else ''}
''' return html def _truncate(self, text: str, length: int) -> str: """Truncate text with ellipsis.""" if not text: return '' if len(text) <= length: return text return text[:length] + '...' def _h1_badge(self, count: Optional[int]) -> str: """Generate badge for H1 count.""" if count is None: return '' if count == 1: return f'{count}' if count == 0: return '0 (brak!)' return f'{count} (za duzo)' def _images_alt_badge(self, without_alt: Optional[int], total: Optional[int]) -> str: """Generate badge for images without alt.""" if without_alt is None: return '' if without_alt == 0: return '0' if total and without_alt / total > 0.5: return f'{without_alt}' return f'{without_alt}' def _core_web_vitals_section(self, company: Dict[str, Any]) -> str: """Generate Core Web Vitals section HTML.""" lcp = company.get('largest_contentful_paint_ms') inp = company.get('interaction_to_next_paint_ms') cls = company.get('cumulative_layout_shift') if lcp is None and inp is None and cls is None: return '' def lcp_status(val): if val is None: return ('—', 'badge-secondary') if val <= 2500: return (f'{val}ms', 'badge-success') if val <= 4000: return (f'{val}ms', 'badge-warning') return (f'{val}ms', 'badge-danger') def inp_status(val): if val is None: return ('—', 'badge-secondary') if val <= 200: return (f'{val}ms', 'badge-success') if val <= 500: return (f'{val}ms', 'badge-warning') return (f'{val}ms', 'badge-danger') def cls_status(val): if val is None: return ('—', 'badge-secondary') if val <= 0.1: return (f'{val:.3f}', 'badge-success') if val <= 0.25: return (f'{val:.3f}', 'badge-warning') return (f'{val:.3f}', 'badge-danger') lcp_val, lcp_class = lcp_status(lcp) inp_val, inp_class = inp_status(inp) cls_val, cls_class = cls_status(cls) return f'''

Core Web Vitals

LCP (Largest Contentful Paint)

Wynik {lcp_val}
Cel < 2500ms

INP (Interaction to Next Paint)

Wynik {inp_val}
Cel < 200ms

CLS (Cumulative Layout Shift)

Wynik {cls_val}
Cel < 0.1
''' def _issues_section(self, company: Dict[str, Any]) -> str: """Generate issues section HTML.""" issues = company.get('seo_issues') or [] errors = company.get('seo_audit_errors') or [] if not issues and not errors: return '' items_html = '' for issue in issues: if isinstance(issue, dict): severity = issue.get('severity', 'info') message = escape(issue.get('message', '')) else: severity = 'info' message = escape(str(issue)) items_html += f'
  • {message}
  • \n' for error in errors: items_html += f'
  • {escape(str(error))}
  • \n' return f'''

    Wykryte problemy

    ''' def _recommendations_section(self, recommendations: List[str]) -> str: """Generate recommendations section HTML.""" if not recommendations: return '' items_html = ''.join(f'
  • {escape(rec)}
  • \n' for rec in recommendations) return f'''

    Rekomendacje

      {items_html}
    ''' def _generate_recommendations(self, company: Dict[str, Any]) -> List[str]: """Generate SEO improvement recommendations based on audit data.""" recommendations = [] # Meta tags if not company.get('meta_title'): recommendations.append( 'Dodaj znacznik do strony. Powinien miec 50-60 znakow i zawierac slowa kluczowe.' ) if not company.get('meta_description'): recommendations.append( 'Dodaj meta description (150-160 znakow). Dobry opis zwieksza CTR w wynikach wyszukiwania.' ) # Headings h1_count = company.get('h1_count') if h1_count == 0: recommendations.append( 'Dodaj naglowek H1 do strony. Kazda strona powinna miec dokladnie jeden H1.' ) elif h1_count and h1_count > 1: recommendations.append( f'Strona ma {h1_count} naglowkow H1. Pozostaw tylko jeden glowny naglowek H1.' ) # Images images_without_alt = company.get('images_without_alt') if images_without_alt and images_without_alt > 0: recommendations.append( f'Dodaj atrybuty alt do {images_without_alt} obrazow. Alt poprawia SEO i dostepnosc.' ) # Technical SEO if not company.get('has_robots_txt'): recommendations.append( 'Utworz plik robots.txt w glownym katalogu strony.' ) if not company.get('has_sitemap'): recommendations.append( 'Utworz i zglos mape strony (sitemap.xml) w Google Search Console.' ) if not company.get('has_canonical'): recommendations.append( 'Dodaj znacznik canonical URL aby uniknac problemow z duplikacja tresci.' ) if not company.get('has_ssl'): recommendations.append( 'Wlacz certyfikat SSL (HTTPS). Google premiuje strony z bezpiecznym polaczeniem.' ) # Mobile if not company.get('viewport_configured'): recommendations.append( 'Dodaj znacznik viewport meta dla prawidlowego wyswietlania na urzadzeniach mobilnych.' ) # Structured data if not company.get('has_structured_data'): recommendations.append( 'Dodaj dane strukturalne (Schema.org) - np. LocalBusiness dla lepszej widocznosci w Google.' ) # Open Graph if not company.get('has_og_tags'): recommendations.append( 'Dodaj znaczniki Open Graph dla lepszego wygladu przy udostepnianiu w mediach spolecznosciowych.' ) # Performance lcp = company.get('largest_contentful_paint_ms') if lcp and lcp > 2500: recommendations.append( f'Popraw LCP (obecnie {lcp}ms). Zoptymalizuj obrazy i skrypty dla szybszego ladowania.' ) cls = company.get('cumulative_layout_shift') if cls and cls > 0.1: recommendations.append( f'Popraw CLS (obecnie {cls:.3f}). Zdefiniuj wymiary obrazow i unikaj dynamicznego dodawania tresci.' ) return recommendations def generate_batch_html_report( self, companies: List[Dict[str, Any]], title: str = "Raport SEO - Norda Biznes" ) -> str: """ Generate batch HTML report summarizing multiple companies. Args: companies: List of company data dicts with SEO analysis. title: Report title. Returns: HTML string of the batch summary report. """ def safe(value): if value is None: return '' return escape(str(value)) def score_color(score): if score is None: return '#6c757d' if score >= 90: return '#28a745' if score >= 50: return '#ffc107' return '#dc3545' # Calculate statistics total = len(companies) audited = sum(1 for c in companies if c.get('seo_audited_at')) scores = [c.get('seo_overall_score') for c in companies if c.get('seo_overall_score') is not None] avg_score = sum(scores) / len(scores) if scores else 0 excellent = sum(1 for s in scores if s >= 90) good = sum(1 for s in scores if 70 <= s < 90) fair = sum(1 for s in scores if 50 <= s < 70) poor = sum(1 for s in scores if s < 50) # Generate table rows rows_html = '' for company in sorted(companies, key=lambda c: c.get('seo_overall_score') or 0, reverse=True): overall = company.get('seo_overall_score') perf = company.get('pagespeed_performance_score') seo = company.get('pagespeed_seo_score') acc = company.get('pagespeed_accessibility_score') rows_html += f''' <tr> <td><strong>{safe(company.get('name'))}</strong></td> <td>{safe(company.get('category_name') or '—')}</td> <td style="color: {score_color(overall)}; font-weight: bold;">{overall if overall is not None else '—'}</td> <td style="color: {score_color(seo)};">{seo if seo is not None else '—'}</td> <td style="color: {score_color(perf)};">{perf if perf is not None else '—'}</td> <td style="color: {score_color(acc)};">{acc if acc is not None else '—'}</td> <td>{'<span class="badge badge-success">Tak</span>' if company.get('has_ssl') else '<span class="badge badge-danger">Nie</span>'}</td> <td>{'<span class="badge badge-success">Tak</span>' if company.get('is_mobile_friendly') else '<span class="badge badge-warning">Nie</span>'}</td> </tr> ''' html = f'''<!DOCTYPE html> <html lang="pl"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>{safe(title)}

    {safe(title)}

    Wygenerowano: {datetime.now().strftime('%d.%m.%Y %H:%M')}
    {total}
    Firm w raporcie
    {audited}
    Przebadanych
    {avg_score:.1f}
    Sredni wynik
    {excellent}
    Doskonaly (90+)
    {good}
    Dobry (70-89)
    {fair}
    Sredni (50-69)
    {poor}
    Slaby (<50)

    Wyniki poszczegolnych firm

    {rows_html}
    Firma Kategoria Wynik SEO PS SEO Wydajnosc Dostepnosc SSL Mobile
    ''' return html def generate_json_export( self, companies: List[Dict[str, Any]], include_raw_data: bool = False ) -> Dict[str, Any]: """ Generate JSON export of SEO audit data. Args: companies: List of company data dicts. include_raw_data: Whether to include all raw data fields. Returns: Dict ready for JSON serialization. """ export = { 'report_version': REPORT_VERSION, 'generated_at': datetime.now().isoformat(), 'total_companies': len(companies), 'audited_companies': sum(1 for c in companies if c.get('seo_audited_at')), 'statistics': self._calculate_statistics(companies), 'companies': [] } for company in companies: company_data = { 'id': company.get('id'), 'name': company.get('name'), 'slug': company.get('slug'), 'website': company.get('website') or company.get('website_url'), 'category': company.get('category_name'), 'nip': company.get('nip'), 'city': company.get('address_city'), 'seo_audit': { 'audited_at': company.get('seo_audited_at').isoformat() if company.get('seo_audited_at') else None, 'audit_version': company.get('seo_audit_version'), 'overall_score': company.get('seo_overall_score'), 'health_score': company.get('seo_health_score'), } } if company.get('seo_audited_at'): company_data['seo_audit']['pagespeed'] = { 'seo_score': company.get('pagespeed_seo_score'), 'performance_score': company.get('pagespeed_performance_score'), 'accessibility_score': company.get('pagespeed_accessibility_score'), 'best_practices_score': company.get('pagespeed_best_practices_score'), } company_data['seo_audit']['on_page'] = { 'meta_title': company.get('meta_title'), 'meta_description': company.get('meta_description'), 'h1_count': company.get('h1_count'), 'h2_count': company.get('h2_count'), 'h3_count': company.get('h3_count'), 'total_images': company.get('total_images'), 'images_without_alt': company.get('images_without_alt'), 'internal_links': company.get('internal_links_count'), 'external_links': company.get('external_links_count'), 'has_structured_data': company.get('has_structured_data'), 'structured_data_types': company.get('structured_data_types'), } company_data['seo_audit']['technical'] = { 'has_ssl': company.get('has_ssl'), 'has_sitemap': company.get('has_sitemap'), 'has_robots_txt': company.get('has_robots_txt'), 'has_canonical': company.get('has_canonical'), 'is_indexable': company.get('is_indexable'), 'is_mobile_friendly': company.get('is_mobile_friendly'), 'viewport_configured': company.get('viewport_configured'), 'http_status': company.get('http_status_code'), 'load_time_ms': company.get('load_time_ms'), } company_data['seo_audit']['core_web_vitals'] = { 'lcp_ms': company.get('largest_contentful_paint_ms'), 'inp_ms': company.get('interaction_to_next_paint_ms'), 'cls': float(company.get('cumulative_layout_shift')) if company.get('cumulative_layout_shift') else None, } company_data['seo_audit']['social'] = { 'has_og_tags': company.get('has_og_tags'), 'og_title': company.get('og_title'), 'has_twitter_cards': company.get('has_twitter_cards'), } company_data['seo_audit']['issues'] = company.get('seo_issues') or [] company_data['seo_audit']['errors'] = company.get('seo_audit_errors') or [] export['companies'].append(company_data) return export def _calculate_statistics(self, companies: List[Dict[str, Any]]) -> Dict[str, Any]: """Calculate summary statistics from company data.""" scores = [c.get('seo_overall_score') for c in companies if c.get('seo_overall_score') is not None] perf_scores = [c.get('pagespeed_performance_score') for c in companies if c.get('pagespeed_performance_score') is not None] stats = { 'seo_scores': { 'average': round(sum(scores) / len(scores), 1) if scores else None, 'min': min(scores) if scores else None, 'max': max(scores) if scores else None, 'count': len(scores), 'distribution': { 'excellent_90_100': sum(1 for s in scores if s >= 90), 'good_70_89': sum(1 for s in scores if 70 <= s < 90), 'fair_50_69': sum(1 for s in scores if 50 <= s < 70), 'poor_0_49': sum(1 for s in scores if s < 50), } }, 'performance_scores': { 'average': round(sum(perf_scores) / len(perf_scores), 1) if perf_scores else None, 'count': len(perf_scores), }, 'technical': { 'with_ssl': sum(1 for c in companies if c.get('has_ssl')), 'with_sitemap': sum(1 for c in companies if c.get('has_sitemap')), 'with_robots_txt': sum(1 for c in companies if c.get('has_robots_txt')), 'mobile_friendly': sum(1 for c in companies if c.get('is_mobile_friendly')), 'with_structured_data': sum(1 for c in companies if c.get('has_structured_data')), 'with_og_tags': sum(1 for c in companies if c.get('has_og_tags')), }, } return stats def save_html_report( self, html: str, output_path: str ) -> str: """ Save HTML report to file. Args: html: HTML content string. output_path: Path to save file. Returns: Full path to saved file. """ path = Path(output_path) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(html, encoding='utf-8') return str(path.absolute()) def save_json_export( self, data: Dict[str, Any], output_path: str ) -> str: """ Save JSON export to file. Args: data: Data dict to serialize. output_path: Path to save file. Returns: Full path to saved file. """ path = Path(output_path) path.parent.mkdir(parents=True, exist_ok=True) def json_serializer(obj): if hasattr(obj, 'isoformat'): return obj.isoformat() if hasattr(obj, '__float__'): return float(obj) raise TypeError(f'Object of type {type(obj)} is not JSON serializable') with open(path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer) return str(path.absolute()) def parse_batch_argument(batch_str: str) -> Tuple[int, int]: """Parse batch argument in format 'START-END'.""" if '-' not in batch_str: raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)") parts = batch_str.split('-') if len(parts) != 2: raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)") try: start = int(parts[0].strip()) end = int(parts[1].strip()) except ValueError: raise ValueError(f"Invalid batch values '{batch_str}'. START and END must be numbers") if start < 1: raise ValueError(f"Invalid batch start '{start}'. Must be >= 1") if end < start: raise ValueError(f"Invalid batch range '{start}-{end}'. END must be >= START") return start, end def main(): """Main entry point for CLI usage.""" parser = argparse.ArgumentParser( description='Generate SEO reports from Norda Biznes audit data', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python seo_report_generator.py --company-id 26 --html python seo_report_generator.py --all --html --output ./reports python seo_report_generator.py --batch 1-10 --json python seo_report_generator.py --all --json --output ./exports python seo_report_generator.py --all --html --json --output ./output Output formats: --html Generate styled HTML reports --json Generate JSON exports for integration File naming: Single company HTML: seo_report_{slug}.html Batch HTML summary: seo_report_batch_{timestamp}.html JSON export: seo_export_{timestamp}.json """ ) # Selection arguments selection = parser.add_argument_group('Company Selection (choose one)') selection.add_argument('--company-id', type=int, metavar='ID', help='Generate report for single company by ID') selection.add_argument('--company-ids', type=str, metavar='IDS', help='Generate reports for multiple companies (comma-separated IDs)') selection.add_argument('--batch', type=str, metavar='RANGE', help='Generate reports for batch of companies (e.g., 1-10)') selection.add_argument('--all', action='store_true', help='Generate reports for all companies') # Output format arguments output_group = parser.add_argument_group('Output Format') output_group.add_argument('--html', action='store_true', help='Generate HTML reports') output_group.add_argument('--json', action='store_true', help='Generate JSON export') # Options options = parser.add_argument_group('Options') options.add_argument('--output', '-o', type=str, metavar='DIR', default='.', help='Output directory (default: current directory)') options.add_argument('--no-recommendations', action='store_true', help='Exclude recommendations from HTML reports') options.add_argument('--batch-summary', action='store_true', help='Generate batch summary HTML instead of individual reports') options.add_argument('--verbose', '-v', action='store_true', help='Verbose output') options.add_argument('--database-url', type=str, metavar='URL', help='Database connection URL') args = parser.parse_args() # Configure logging if args.verbose: logging.getLogger().setLevel(logging.DEBUG) # Validate selection selection_count = sum([ args.company_id is not None, args.company_ids is not None, args.batch is not None, args.all ]) if selection_count == 0: parser.print_help() print("\nError: Please specify one of --company-id, --company-ids, --batch, or --all") sys.exit(1) if selection_count > 1: print("Error: Please specify only one selection method") sys.exit(1) # Validate output format if not args.html and not args.json: parser.print_help() print("\nError: Please specify at least one output format: --html or --json") sys.exit(1) # Parse selection arguments company_ids = None batch_start, batch_end = None, None if args.company_id: company_ids = [args.company_id] elif args.company_ids: try: company_ids = [int(x.strip()) for x in args.company_ids.split(',')] except ValueError: print("Error: Invalid --company-ids format. Use comma-separated integers") sys.exit(1) elif args.batch: try: batch_start, batch_end = parse_batch_argument(args.batch) except ValueError as e: print(f"Error: {e}") sys.exit(1) # Initialize generator database_url = args.database_url or DATABASE_URL try: generator = SEOReportGenerator(database_url=database_url) except Exception as e: logger.error(f"Failed to initialize: {e}") print(f"Error: {e}") sys.exit(1) # Fetch data logger.info("Fetching company data from database...") try: companies = generator.get_companies_with_seo_data( company_ids=company_ids, batch_start=batch_start, batch_end=batch_end ) except Exception as e: logger.error(f"Failed to fetch data: {e}") print(f"Error fetching data: {e}") sys.exit(1) if not companies: print("No companies found matching the criteria") sys.exit(1) logger.info(f"Found {len(companies)} companies") # Create output directory output_dir = Path(args.output) output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') generated_files = [] # Generate HTML reports if args.html: if args.batch_summary or len(companies) > 1: # Generate batch summary logger.info("Generating batch HTML summary...") html = generator.generate_batch_html_report(companies) filename = f"seo_report_batch_{timestamp}.html" filepath = generator.save_html_report(html, output_dir / filename) generated_files.append(filepath) logger.info(f"Saved: {filepath}") # Also generate individual reports if not only summary if not args.batch_summary: for company in companies: slug = company.get('slug', f"company_{company.get('id')}") html = generator.generate_html_report( company, include_recommendations=not args.no_recommendations ) filename = f"seo_report_{slug}.html" filepath = generator.save_html_report(html, output_dir / filename) generated_files.append(filepath) logger.debug(f"Saved: {filepath}") logger.info(f"Generated {len(companies)} individual HTML reports") else: # Single company report company = companies[0] slug = company.get('slug', f"company_{company.get('id')}") html = generator.generate_html_report( company, include_recommendations=not args.no_recommendations ) filename = f"seo_report_{slug}.html" filepath = generator.save_html_report(html, output_dir / filename) generated_files.append(filepath) logger.info(f"Saved: {filepath}") # Generate JSON export if args.json: logger.info("Generating JSON export...") data = generator.generate_json_export(companies) filename = f"seo_export_{timestamp}.json" filepath = generator.save_json_export(data, output_dir / filename) generated_files.append(filepath) logger.info(f"Saved: {filepath}") # Summary print("\n" + "=" * 60) print("REPORT GENERATION COMPLETE") print("=" * 60) print(f"Companies processed: {len(companies)}") print(f"Files generated: {len(generated_files)}") print(f"Output directory: {output_dir.absolute()}") print("\nGenerated files:") for f in generated_files: print(f" - {f}") print("=" * 60) if __name__ == '__main__': main()