#!/usr/bin/env python3
"""
SEO Report Generator for Norda Biznes
=====================================
Generates HTML reports (single company or batch) and JSON exports from SEO audit data.
Designed for offline viewing, sharing with clients, and archiving audit results.
Usage:
python seo_report_generator.py --company-id 26 --html
python seo_report_generator.py --all --html --output ./reports
python seo_report_generator.py --batch 1-10 --json
python seo_report_generator.py --all --json --output ./exports
Output:
- HTML: Styled, standalone reports suitable for viewing in browsers
- JSON: Machine-readable exports for integration with other tools
Author: Maciej Pienczyn, InPi sp. z o.o.
Date: 2026-01-08
"""
import os
import sys
import json
import argparse
import logging
from datetime import datetime
from typing import Optional, Dict, List, Any, Tuple
from pathlib import Path
from html import escape
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)
# Database configuration
# WARNING: The fallback DATABASE_URL uses a placeholder password.
# Production credentials MUST be set via the DATABASE_URL environment variable.
# NEVER commit real credentials to version control (CWE-798).
DATABASE_URL = os.getenv(
'DATABASE_URL',
'postgresql://nordabiz_app:CHANGE_ME@127.0.0.1:5432/nordabiz'
)
# Report version for tracking
REPORT_VERSION = '1.0.0'
class SEOReportGenerator:
"""
Generates HTML and JSON reports from SEO audit data stored in database.
"""
def __init__(self, database_url: str = DATABASE_URL):
"""
Initialize report generator.
Args:
database_url: Database connection string.
"""
self.engine = create_engine(database_url)
self.Session = sessionmaker(bind=self.engine)
def get_companies_with_seo_data(
self,
company_ids: Optional[List[int]] = None,
batch_start: Optional[int] = None,
batch_end: Optional[int] = None
) -> List[Dict[str, Any]]:
"""
Fetch companies with their SEO analysis data from database.
Args:
company_ids: List of specific company IDs to fetch.
batch_start: Start index for batch processing (1-indexed).
batch_end: End index for batch processing (1-indexed).
Returns:
List of company dicts with SEO analysis data.
"""
with self.Session() as session:
base_query = """
SELECT
c.id, c.name, c.slug, c.website, c.address_city,
c.nip, c.email, c.phone,
cat.name as category_name,
wa.analyzed_at, wa.website_url, wa.final_url,
wa.http_status_code, wa.load_time_ms,
wa.pagespeed_seo_score, wa.pagespeed_performance_score,
wa.pagespeed_accessibility_score, wa.pagespeed_best_practices_score,
wa.meta_title, wa.meta_description, wa.meta_keywords,
wa.h1_count, wa.h2_count, wa.h3_count, wa.h1_text,
wa.total_images, wa.images_without_alt, wa.images_with_alt,
wa.internal_links_count, wa.external_links_count,
wa.has_structured_data, wa.structured_data_types,
wa.has_canonical, wa.canonical_url, wa.is_indexable, wa.noindex_reason,
wa.has_sitemap, wa.has_robots_txt,
wa.viewport_configured, wa.is_mobile_friendly,
wa.largest_contentful_paint_ms, wa.interaction_to_next_paint_ms, wa.cumulative_layout_shift,
wa.has_og_tags, wa.og_title, wa.og_description, wa.og_image,
wa.has_twitter_cards, wa.html_lang, wa.has_hreflang,
wa.word_count_homepage,
wa.seo_audit_version, wa.seo_audited_at, wa.seo_audit_errors,
wa.seo_overall_score, wa.seo_health_score, wa.seo_issues,
wa.has_ssl, wa.ssl_expires_at
FROM companies c
LEFT JOIN company_website_analysis wa ON c.id = wa.company_id
LEFT JOIN categories cat ON c.category_id = cat.id
WHERE c.is_active = TRUE
"""
if company_ids:
query = text(base_query + " AND c.id = ANY(:ids) ORDER BY c.id")
result = session.execute(query, {'ids': company_ids})
elif batch_start is not None and batch_end is not None:
query = text(base_query + " ORDER BY c.id OFFSET :offset LIMIT :limit")
result = session.execute(query, {
'offset': batch_start - 1,
'limit': batch_end - batch_start + 1
})
else:
query = text(base_query + " ORDER BY c.id")
result = session.execute(query)
companies = []
for row in result:
company = dict(row._mapping)
# Parse JSON fields if they are strings
if company.get('seo_issues') and isinstance(company['seo_issues'], str):
try:
company['seo_issues'] = json.loads(company['seo_issues'])
except json.JSONDecodeError:
company['seo_issues'] = []
if company.get('seo_audit_errors') and isinstance(company['seo_audit_errors'], str):
try:
company['seo_audit_errors'] = json.loads(company['seo_audit_errors'])
except json.JSONDecodeError:
company['seo_audit_errors'] = []
companies.append(company)
return companies
def generate_html_report(
self,
company: Dict[str, Any],
include_recommendations: bool = True
) -> str:
"""
Generate HTML report for a single company.
Args:
company: Company data dict with SEO analysis.
include_recommendations: Whether to include improvement recommendations.
Returns:
HTML string of the complete report.
"""
# Escape HTML in all string values
def safe(value):
if value is None:
return ''
return escape(str(value))
# Score color helper
def score_color(score):
if score is None:
return '#6c757d' # gray
if score >= 90:
return '#28a745' # green
if score >= 50:
return '#ffc107' # yellow
return '#dc3545' # red
def score_label(score):
if score is None:
return 'Brak danych'
if score >= 90:
return 'Doskonały'
if score >= 70:
return 'Dobry'
if score >= 50:
return 'Średni'
return 'Wymaga poprawy'
# Generate recommendations based on issues
recommendations = []
if include_recommendations:
recommendations = self._generate_recommendations(company)
# Build HTML
html = f'''
Raport SEO - {safe(company.get('name'))}
Wyniki SEO
Ogolny wynik SEO
{company.get('seo_overall_score') if company.get('seo_overall_score') is not None else '—'}
{score_label(company.get('seo_overall_score'))}
PageSpeed SEO
{company.get('pagespeed_seo_score') if company.get('pagespeed_seo_score') is not None else '—'}
{score_label(company.get('pagespeed_seo_score'))}
Wydajnosc
{company.get('pagespeed_performance_score') if company.get('pagespeed_performance_score') is not None else '—'}
{score_label(company.get('pagespeed_performance_score'))}
Dostepnosc
{company.get('pagespeed_accessibility_score') if company.get('pagespeed_accessibility_score') is not None else '—'}
{score_label(company.get('pagespeed_accessibility_score'))}
Szczegoly techniczne
Meta tagi
Tytul strony
{self._truncate(safe(company.get('meta_title')), 40) or '—'}
Opis meta
{f'Tak ({len(company.get("meta_description") or "")} zn.)' if company.get('meta_description') else 'Brak'}
Canonical URL
{'Tak' if company.get('has_canonical') else 'Nie'}
Struktura naglowkow
H1
{self._h1_badge(company.get('h1_count'))}
H2
{company.get('h2_count') if company.get('h2_count') is not None else '—'}
H3
{company.get('h3_count') if company.get('h3_count') is not None else '—'}
Obrazy
Liczba obrazow
{company.get('total_images') if company.get('total_images') is not None else '—'}
Bez alt
{self._images_alt_badge(company.get('images_without_alt'), company.get('total_images'))}
Z alt
{company.get('images_with_alt') if company.get('images_with_alt') is not None else '—'}
Linki
Wewnetrzne
{company.get('internal_links_count') if company.get('internal_links_count') is not None else '—'}
Zewnetrzne
{company.get('external_links_count') if company.get('external_links_count') is not None else '—'}
Techniczne SEO
Pliki i indeksowanie
robots.txt
{'Tak' if company.get('has_robots_txt') else 'Nie'}
sitemap.xml
{'Tak' if company.get('has_sitemap') else 'Nie'}
Indeksowalnosc
{'Tak' if company.get('is_indexable') else f'Nie ({safe(company.get("noindex_reason") or "")})'}
Bezpieczenstwo i mobilnosc
SSL/HTTPS
{'Tak' if company.get('has_ssl') else 'Nie'}
Viewport
{'Tak' if company.get('viewport_configured') else 'Nie'}
Mobile-friendly
{'Tak' if company.get('is_mobile_friendly') else 'Nie'}
Dane strukturalne
Schema.org
{'Tak' if company.get('has_structured_data') else 'Nie'}
Typy
{', '.join(company.get('structured_data_types') or []) or '—'}
Social Media
Open Graph
{'Tak' if company.get('has_og_tags') else 'Nie'}
Twitter Cards
{'Tak' if company.get('has_twitter_cards') else 'Nie'}
Jezyk (lang)
{safe(company.get('html_lang')) or '—'}
{self._core_web_vitals_section(company)}
{self._issues_section(company)}
{self._recommendations_section(recommendations) if recommendations else ''}
'''
return html
def _truncate(self, text: str, length: int) -> str:
"""Truncate text with ellipsis."""
if not text:
return ''
if len(text) <= length:
return text
return text[:length] + '...'
def _h1_badge(self, count: Optional[int]) -> str:
"""Generate badge for H1 count."""
if count is None:
return '—'
if count == 1:
return f'{count}'
if count == 0:
return '0 (brak!)'
return f'{count} (za duzo)'
def _images_alt_badge(self, without_alt: Optional[int], total: Optional[int]) -> str:
"""Generate badge for images without alt."""
if without_alt is None:
return '—'
if without_alt == 0:
return '0'
if total and without_alt / total > 0.5:
return f'{without_alt}'
return f'{without_alt}'
def _core_web_vitals_section(self, company: Dict[str, Any]) -> str:
"""Generate Core Web Vitals section HTML."""
lcp = company.get('largest_contentful_paint_ms')
inp = company.get('interaction_to_next_paint_ms')
cls = company.get('cumulative_layout_shift')
if lcp is None and inp is None and cls is None:
return ''
def lcp_status(val):
if val is None:
return ('—', 'badge-secondary')
if val <= 2500:
return (f'{val}ms', 'badge-success')
if val <= 4000:
return (f'{val}ms', 'badge-warning')
return (f'{val}ms', 'badge-danger')
def inp_status(val):
if val is None:
return ('—', 'badge-secondary')
if val <= 200:
return (f'{val}ms', 'badge-success')
if val <= 500:
return (f'{val}ms', 'badge-warning')
return (f'{val}ms', 'badge-danger')
def cls_status(val):
if val is None:
return ('—', 'badge-secondary')
if val <= 0.1:
return (f'{val:.3f}', 'badge-success')
if val <= 0.25:
return (f'{val:.3f}', 'badge-warning')
return (f'{val:.3f}', 'badge-danger')
lcp_val, lcp_class = lcp_status(lcp)
inp_val, inp_class = inp_status(inp)
cls_val, cls_class = cls_status(cls)
return f'''
Core Web Vitals
LCP (Largest Contentful Paint)
Wynik
{lcp_val}
Cel
< 2500ms
INP (Interaction to Next Paint)
Wynik
{inp_val}
Cel
< 200ms
CLS (Cumulative Layout Shift)
Wynik
{cls_val}
Cel
< 0.1
'''
def _issues_section(self, company: Dict[str, Any]) -> str:
"""Generate issues section HTML."""
issues = company.get('seo_issues') or []
errors = company.get('seo_audit_errors') or []
if not issues and not errors:
return ''
items_html = ''
for issue in issues:
if isinstance(issue, dict):
severity = issue.get('severity', 'info')
message = escape(issue.get('message', ''))
else:
severity = 'info'
message = escape(str(issue))
items_html += f'{message}\n'
for error in errors:
items_html += f'{escape(str(error))}\n'
return f'''
'''
def _recommendations_section(self, recommendations: List[str]) -> str:
"""Generate recommendations section HTML."""
if not recommendations:
return ''
items_html = ''.join(f'{escape(rec)}\n' for rec in recommendations)
return f'''
'''
def _generate_recommendations(self, company: Dict[str, Any]) -> List[str]:
"""Generate SEO improvement recommendations based on audit data."""
recommendations = []
# Meta tags
if not company.get('meta_title'):
recommendations.append(
'Dodaj znacznik do strony. Powinien miec 50-60 znakow i zawierac slowa kluczowe.'
)
if not company.get('meta_description'):
recommendations.append(
'Dodaj meta description (150-160 znakow). Dobry opis zwieksza CTR w wynikach wyszukiwania.'
)
# Headings
h1_count = company.get('h1_count')
if h1_count == 0:
recommendations.append(
'Dodaj naglowek H1 do strony. Kazda strona powinna miec dokladnie jeden H1.'
)
elif h1_count and h1_count > 1:
recommendations.append(
f'Strona ma {h1_count} naglowkow H1. Pozostaw tylko jeden glowny naglowek H1.'
)
# Images
images_without_alt = company.get('images_without_alt')
if images_without_alt and images_without_alt > 0:
recommendations.append(
f'Dodaj atrybuty alt do {images_without_alt} obrazow. Alt poprawia SEO i dostepnosc.'
)
# Technical SEO
if not company.get('has_robots_txt'):
recommendations.append(
'Utworz plik robots.txt w glownym katalogu strony.'
)
if not company.get('has_sitemap'):
recommendations.append(
'Utworz i zglos mape strony (sitemap.xml) w Google Search Console.'
)
if not company.get('has_canonical'):
recommendations.append(
'Dodaj znacznik canonical URL aby uniknac problemow z duplikacja tresci.'
)
if not company.get('has_ssl'):
recommendations.append(
'Wlacz certyfikat SSL (HTTPS). Google premiuje strony z bezpiecznym polaczeniem.'
)
# Mobile
if not company.get('viewport_configured'):
recommendations.append(
'Dodaj znacznik viewport meta dla prawidlowego wyswietlania na urzadzeniach mobilnych.'
)
# Structured data
if not company.get('has_structured_data'):
recommendations.append(
'Dodaj dane strukturalne (Schema.org) - np. LocalBusiness dla lepszej widocznosci w Google.'
)
# Open Graph
if not company.get('has_og_tags'):
recommendations.append(
'Dodaj znaczniki Open Graph dla lepszego wygladu przy udostepnianiu w mediach spolecznosciowych.'
)
# Performance
lcp = company.get('largest_contentful_paint_ms')
if lcp and lcp > 2500:
recommendations.append(
f'Popraw LCP (obecnie {lcp}ms). Zoptymalizuj obrazy i skrypty dla szybszego ladowania.'
)
cls = company.get('cumulative_layout_shift')
if cls and cls > 0.1:
recommendations.append(
f'Popraw CLS (obecnie {cls:.3f}). Zdefiniuj wymiary obrazow i unikaj dynamicznego dodawania tresci.'
)
return recommendations
def generate_batch_html_report(
self,
companies: List[Dict[str, Any]],
title: str = "Raport SEO - Norda Biznes"
) -> str:
"""
Generate batch HTML report summarizing multiple companies.
Args:
companies: List of company data dicts with SEO analysis.
title: Report title.
Returns:
HTML string of the batch summary report.
"""
def safe(value):
if value is None:
return ''
return escape(str(value))
def score_color(score):
if score is None:
return '#6c757d'
if score >= 90:
return '#28a745'
if score >= 50:
return '#ffc107'
return '#dc3545'
# Calculate statistics
total = len(companies)
audited = sum(1 for c in companies if c.get('seo_audited_at'))
scores = [c.get('seo_overall_score') for c in companies if c.get('seo_overall_score') is not None]
avg_score = sum(scores) / len(scores) if scores else 0
excellent = sum(1 for s in scores if s >= 90)
good = sum(1 for s in scores if 70 <= s < 90)
fair = sum(1 for s in scores if 50 <= s < 70)
poor = sum(1 for s in scores if s < 50)
# Generate table rows
rows_html = ''
for company in sorted(companies, key=lambda c: c.get('seo_overall_score') or 0, reverse=True):
overall = company.get('seo_overall_score')
perf = company.get('pagespeed_performance_score')
seo = company.get('pagespeed_seo_score')
acc = company.get('pagespeed_accessibility_score')
rows_html += f'''
| {safe(company.get('name'))} |
{safe(company.get('category_name') or '—')} |
{overall if overall is not None else '—'} |
{seo if seo is not None else '—'} |
{perf if perf is not None else '—'} |
{acc if acc is not None else '—'} |
{'Tak' if company.get('has_ssl') else 'Nie'} |
{'Tak' if company.get('is_mobile_friendly') else 'Nie'} |
'''
html = f'''
{safe(title)}
{avg_score:.1f}
Sredni wynik
{excellent}
Doskonaly (90+)
Wyniki poszczegolnych firm
| Firma |
Kategoria |
Wynik SEO |
PS SEO |
Wydajnosc |
Dostepnosc |
SSL |
Mobile |
{rows_html}
'''
return html
def generate_json_export(
self,
companies: List[Dict[str, Any]],
include_raw_data: bool = False
) -> Dict[str, Any]:
"""
Generate JSON export of SEO audit data.
Args:
companies: List of company data dicts.
include_raw_data: Whether to include all raw data fields.
Returns:
Dict ready for JSON serialization.
"""
export = {
'report_version': REPORT_VERSION,
'generated_at': datetime.now().isoformat(),
'total_companies': len(companies),
'audited_companies': sum(1 for c in companies if c.get('seo_audited_at')),
'statistics': self._calculate_statistics(companies),
'companies': []
}
for company in companies:
company_data = {
'id': company.get('id'),
'name': company.get('name'),
'slug': company.get('slug'),
'website': company.get('website') or company.get('website_url'),
'category': company.get('category_name'),
'nip': company.get('nip'),
'city': company.get('address_city'),
'seo_audit': {
'audited_at': company.get('seo_audited_at').isoformat() if company.get('seo_audited_at') else None,
'audit_version': company.get('seo_audit_version'),
'overall_score': company.get('seo_overall_score'),
'health_score': company.get('seo_health_score'),
}
}
if company.get('seo_audited_at'):
company_data['seo_audit']['pagespeed'] = {
'seo_score': company.get('pagespeed_seo_score'),
'performance_score': company.get('pagespeed_performance_score'),
'accessibility_score': company.get('pagespeed_accessibility_score'),
'best_practices_score': company.get('pagespeed_best_practices_score'),
}
company_data['seo_audit']['on_page'] = {
'meta_title': company.get('meta_title'),
'meta_description': company.get('meta_description'),
'h1_count': company.get('h1_count'),
'h2_count': company.get('h2_count'),
'h3_count': company.get('h3_count'),
'total_images': company.get('total_images'),
'images_without_alt': company.get('images_without_alt'),
'internal_links': company.get('internal_links_count'),
'external_links': company.get('external_links_count'),
'has_structured_data': company.get('has_structured_data'),
'structured_data_types': company.get('structured_data_types'),
}
company_data['seo_audit']['technical'] = {
'has_ssl': company.get('has_ssl'),
'has_sitemap': company.get('has_sitemap'),
'has_robots_txt': company.get('has_robots_txt'),
'has_canonical': company.get('has_canonical'),
'is_indexable': company.get('is_indexable'),
'is_mobile_friendly': company.get('is_mobile_friendly'),
'viewport_configured': company.get('viewport_configured'),
'http_status': company.get('http_status_code'),
'load_time_ms': company.get('load_time_ms'),
}
company_data['seo_audit']['core_web_vitals'] = {
'lcp_ms': company.get('largest_contentful_paint_ms'),
'inp_ms': company.get('interaction_to_next_paint_ms'),
'cls': float(company.get('cumulative_layout_shift')) if company.get('cumulative_layout_shift') else None,
}
company_data['seo_audit']['social'] = {
'has_og_tags': company.get('has_og_tags'),
'og_title': company.get('og_title'),
'has_twitter_cards': company.get('has_twitter_cards'),
}
company_data['seo_audit']['issues'] = company.get('seo_issues') or []
company_data['seo_audit']['errors'] = company.get('seo_audit_errors') or []
export['companies'].append(company_data)
return export
def _calculate_statistics(self, companies: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Calculate summary statistics from company data."""
scores = [c.get('seo_overall_score') for c in companies if c.get('seo_overall_score') is not None]
perf_scores = [c.get('pagespeed_performance_score') for c in companies if c.get('pagespeed_performance_score') is not None]
stats = {
'seo_scores': {
'average': round(sum(scores) / len(scores), 1) if scores else None,
'min': min(scores) if scores else None,
'max': max(scores) if scores else None,
'count': len(scores),
'distribution': {
'excellent_90_100': sum(1 for s in scores if s >= 90),
'good_70_89': sum(1 for s in scores if 70 <= s < 90),
'fair_50_69': sum(1 for s in scores if 50 <= s < 70),
'poor_0_49': sum(1 for s in scores if s < 50),
}
},
'performance_scores': {
'average': round(sum(perf_scores) / len(perf_scores), 1) if perf_scores else None,
'count': len(perf_scores),
},
'technical': {
'with_ssl': sum(1 for c in companies if c.get('has_ssl')),
'with_sitemap': sum(1 for c in companies if c.get('has_sitemap')),
'with_robots_txt': sum(1 for c in companies if c.get('has_robots_txt')),
'mobile_friendly': sum(1 for c in companies if c.get('is_mobile_friendly')),
'with_structured_data': sum(1 for c in companies if c.get('has_structured_data')),
'with_og_tags': sum(1 for c in companies if c.get('has_og_tags')),
},
}
return stats
def save_html_report(
self,
html: str,
output_path: str
) -> str:
"""
Save HTML report to file.
Args:
html: HTML content string.
output_path: Path to save file.
Returns:
Full path to saved file.
"""
path = Path(output_path)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(html, encoding='utf-8')
return str(path.absolute())
def save_json_export(
self,
data: Dict[str, Any],
output_path: str
) -> str:
"""
Save JSON export to file.
Args:
data: Data dict to serialize.
output_path: Path to save file.
Returns:
Full path to saved file.
"""
path = Path(output_path)
path.parent.mkdir(parents=True, exist_ok=True)
def json_serializer(obj):
if hasattr(obj, 'isoformat'):
return obj.isoformat()
if hasattr(obj, '__float__'):
return float(obj)
raise TypeError(f'Object of type {type(obj)} is not JSON serializable')
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer)
return str(path.absolute())
def parse_batch_argument(batch_str: str) -> Tuple[int, int]:
"""Parse batch argument in format 'START-END'."""
if '-' not in batch_str:
raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)")
parts = batch_str.split('-')
if len(parts) != 2:
raise ValueError(f"Invalid batch format '{batch_str}'. Use START-END (e.g., 1-10)")
try:
start = int(parts[0].strip())
end = int(parts[1].strip())
except ValueError:
raise ValueError(f"Invalid batch values '{batch_str}'. START and END must be numbers")
if start < 1:
raise ValueError(f"Invalid batch start '{start}'. Must be >= 1")
if end < start:
raise ValueError(f"Invalid batch range '{start}-{end}'. END must be >= START")
return start, end
def main():
"""Main entry point for CLI usage."""
parser = argparse.ArgumentParser(
description='Generate SEO reports from Norda Biznes audit data',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python seo_report_generator.py --company-id 26 --html
python seo_report_generator.py --all --html --output ./reports
python seo_report_generator.py --batch 1-10 --json
python seo_report_generator.py --all --json --output ./exports
python seo_report_generator.py --all --html --json --output ./output
Output formats:
--html Generate styled HTML reports
--json Generate JSON exports for integration
File naming:
Single company HTML: seo_report_{slug}.html
Batch HTML summary: seo_report_batch_{timestamp}.html
JSON export: seo_export_{timestamp}.json
"""
)
# Selection arguments
selection = parser.add_argument_group('Company Selection (choose one)')
selection.add_argument('--company-id', type=int, metavar='ID',
help='Generate report for single company by ID')
selection.add_argument('--company-ids', type=str, metavar='IDS',
help='Generate reports for multiple companies (comma-separated IDs)')
selection.add_argument('--batch', type=str, metavar='RANGE',
help='Generate reports for batch of companies (e.g., 1-10)')
selection.add_argument('--all', action='store_true',
help='Generate reports for all companies')
# Output format arguments
output_group = parser.add_argument_group('Output Format')
output_group.add_argument('--html', action='store_true',
help='Generate HTML reports')
output_group.add_argument('--json', action='store_true',
help='Generate JSON export')
# Options
options = parser.add_argument_group('Options')
options.add_argument('--output', '-o', type=str, metavar='DIR', default='.',
help='Output directory (default: current directory)')
options.add_argument('--no-recommendations', action='store_true',
help='Exclude recommendations from HTML reports')
options.add_argument('--batch-summary', action='store_true',
help='Generate batch summary HTML instead of individual reports')
options.add_argument('--verbose', '-v', action='store_true',
help='Verbose output')
options.add_argument('--database-url', type=str, metavar='URL',
help='Database connection URL')
args = parser.parse_args()
# Configure logging
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
# Validate selection
selection_count = sum([
args.company_id is not None,
args.company_ids is not None,
args.batch is not None,
args.all
])
if selection_count == 0:
parser.print_help()
print("\nError: Please specify one of --company-id, --company-ids, --batch, or --all")
sys.exit(1)
if selection_count > 1:
print("Error: Please specify only one selection method")
sys.exit(1)
# Validate output format
if not args.html and not args.json:
parser.print_help()
print("\nError: Please specify at least one output format: --html or --json")
sys.exit(1)
# Parse selection arguments
company_ids = None
batch_start, batch_end = None, None
if args.company_id:
company_ids = [args.company_id]
elif args.company_ids:
try:
company_ids = [int(x.strip()) for x in args.company_ids.split(',')]
except ValueError:
print("Error: Invalid --company-ids format. Use comma-separated integers")
sys.exit(1)
elif args.batch:
try:
batch_start, batch_end = parse_batch_argument(args.batch)
except ValueError as e:
print(f"Error: {e}")
sys.exit(1)
# Initialize generator
database_url = args.database_url or DATABASE_URL
try:
generator = SEOReportGenerator(database_url=database_url)
except Exception as e:
logger.error(f"Failed to initialize: {e}")
print(f"Error: {e}")
sys.exit(1)
# Fetch data
logger.info("Fetching company data from database...")
try:
companies = generator.get_companies_with_seo_data(
company_ids=company_ids,
batch_start=batch_start,
batch_end=batch_end
)
except Exception as e:
logger.error(f"Failed to fetch data: {e}")
print(f"Error fetching data: {e}")
sys.exit(1)
if not companies:
print("No companies found matching the criteria")
sys.exit(1)
logger.info(f"Found {len(companies)} companies")
# Create output directory
output_dir = Path(args.output)
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
generated_files = []
# Generate HTML reports
if args.html:
if args.batch_summary or len(companies) > 1:
# Generate batch summary
logger.info("Generating batch HTML summary...")
html = generator.generate_batch_html_report(companies)
filename = f"seo_report_batch_{timestamp}.html"
filepath = generator.save_html_report(html, output_dir / filename)
generated_files.append(filepath)
logger.info(f"Saved: {filepath}")
# Also generate individual reports if not only summary
if not args.batch_summary:
for company in companies:
slug = company.get('slug', f"company_{company.get('id')}")
html = generator.generate_html_report(
company,
include_recommendations=not args.no_recommendations
)
filename = f"seo_report_{slug}.html"
filepath = generator.save_html_report(html, output_dir / filename)
generated_files.append(filepath)
logger.debug(f"Saved: {filepath}")
logger.info(f"Generated {len(companies)} individual HTML reports")
else:
# Single company report
company = companies[0]
slug = company.get('slug', f"company_{company.get('id')}")
html = generator.generate_html_report(
company,
include_recommendations=not args.no_recommendations
)
filename = f"seo_report_{slug}.html"
filepath = generator.save_html_report(html, output_dir / filename)
generated_files.append(filepath)
logger.info(f"Saved: {filepath}")
# Generate JSON export
if args.json:
logger.info("Generating JSON export...")
data = generator.generate_json_export(companies)
filename = f"seo_export_{timestamp}.json"
filepath = generator.save_json_export(data, output_dir / filename)
generated_files.append(filepath)
logger.info(f"Saved: {filepath}")
# Summary
print("\n" + "=" * 60)
print("REPORT GENERATION COMPLETE")
print("=" * 60)
print(f"Companies processed: {len(companies)}")
print(f"Files generated: {len(generated_files)}")
print(f"Output directory: {output_dir.absolute()}")
print("\nGenerated files:")
for f in generated_files:
print(f" - {f}")
print("=" * 60)
if __name__ == '__main__':
main()