diff --git a/tests/test_seo_audit.py b/tests/test_seo_audit.py
new file mode 100644
index 0000000..dcff784
--- /dev/null
+++ b/tests/test_seo_audit.py
@@ -0,0 +1,1111 @@
+#!/usr/bin/env python3
+"""
+Unit Tests for SEO Audit Functionality
+======================================
+
+Tests for:
+- PageSpeed API client (scripts/pagespeed_client.py)
+- On-Page SEO Analyzer (scripts/seo_analyzer.py)
+- Technical SEO Checker (scripts/seo_analyzer.py)
+- SEO Audit database operations (scripts/seo_audit.py)
+
+Run tests:
+ cd tests
+ python -m pytest test_seo_audit.py -v
+
+Author: Claude Code
+Date: 2026-01-08
+"""
+
+import json
+import sys
+import unittest
+from datetime import datetime, date
+from pathlib import Path
+from unittest.mock import Mock, MagicMock, patch, PropertyMock
+
+# Add scripts directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent / 'scripts'))
+
+# Import modules to test
+from pagespeed_client import (
+ GooglePageSpeedClient,
+ PageSpeedResult,
+ PageSpeedScore,
+ CoreWebVitals,
+ RateLimiter,
+ PageSpeedAPIError,
+ QuotaExceededError,
+ RateLimitError,
+ Strategy,
+ Category,
+)
+from seo_analyzer import (
+ OnPageSEOAnalyzer,
+ OnPageSEOResult,
+ MetaTags,
+ OpenGraphData,
+ TwitterCardData,
+ HeadingStructure,
+ ImageAnalysis,
+ LinkAnalysis,
+ StructuredData,
+ TechnicalSEOChecker,
+ TechnicalSEOResult,
+ RobotsTxtResult,
+ SitemapResult,
+ RedirectChainResult,
+ RedirectInfo,
+ CanonicalResult,
+ IndexabilityResult,
+)
+
+
+# ============================================================================
+# PageSpeed Client Tests
+# ============================================================================
+
+class TestPageSpeedScore(unittest.TestCase):
+ """Tests for PageSpeedScore dataclass."""
+
+ def test_create_score(self):
+ """Test creating PageSpeedScore with all values."""
+ score = PageSpeedScore(
+ performance=95,
+ accessibility=88,
+ best_practices=92,
+ seo=100
+ )
+ self.assertEqual(score.performance, 95)
+ self.assertEqual(score.accessibility, 88)
+ self.assertEqual(score.best_practices, 92)
+ self.assertEqual(score.seo, 100)
+
+ def test_score_to_dict(self):
+ """Test converting score to dictionary."""
+ score = PageSpeedScore(performance=95, seo=100)
+ result = score.to_dict()
+ self.assertIsInstance(result, dict)
+ self.assertEqual(result['performance'], 95)
+ self.assertEqual(result['seo'], 100)
+ self.assertIsNone(result['accessibility'])
+ self.assertIsNone(result['best_practices'])
+
+
+class TestCoreWebVitals(unittest.TestCase):
+ """Tests for CoreWebVitals dataclass."""
+
+ def test_create_vitals(self):
+ """Test creating CoreWebVitals."""
+ vitals = CoreWebVitals(
+ lcp_ms=1500,
+ fid_ms=50,
+ cls=0.05,
+ fcp_ms=1200,
+ ttfb_ms=200
+ )
+ self.assertEqual(vitals.lcp_ms, 1500)
+ self.assertEqual(vitals.fid_ms, 50)
+ self.assertEqual(vitals.cls, 0.05)
+
+ def test_vitals_to_dict(self):
+ """Test converting vitals to dictionary."""
+ vitals = CoreWebVitals(lcp_ms=1500, cls=0.1)
+ result = vitals.to_dict()
+ self.assertEqual(result['lcp_ms'], 1500)
+ self.assertEqual(result['cls'], 0.1)
+
+
+class TestRateLimiter(unittest.TestCase):
+ """Tests for RateLimiter class."""
+
+ def setUp(self):
+ """Set up test with a temporary quota file."""
+ import tempfile
+ self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
+ self.temp_file.close()
+ self.limiter = RateLimiter(
+ daily_limit=100,
+ min_interval=0.1,
+ quota_file=self.temp_file.name
+ )
+
+ def tearDown(self):
+ """Clean up temp file."""
+ import os
+ try:
+ os.unlink(self.temp_file.name)
+ except FileNotFoundError:
+ pass
+
+ def test_initial_state(self):
+ """Test initial state of rate limiter."""
+ self.assertEqual(self.limiter.daily_limit, 100)
+ self.assertEqual(self.limiter.requests_today, 0)
+ self.assertEqual(self.limiter.get_remaining_quota(), 100)
+
+ def test_can_make_request_when_under_quota(self):
+ """Test can_make_request returns True when under quota."""
+ self.assertTrue(self.limiter.can_make_request())
+
+ def test_can_make_request_when_quota_exceeded(self):
+ """Test can_make_request returns False when quota exceeded."""
+ self.limiter.requests_today = 100
+ self.assertFalse(self.limiter.can_make_request())
+
+ def test_record_request(self):
+ """Test recording a request updates counter."""
+ initial = self.limiter.requests_today
+ self.limiter.record_request()
+ self.assertEqual(self.limiter.requests_today, initial + 1)
+
+ def test_get_remaining_quota(self):
+ """Test remaining quota calculation."""
+ self.limiter.requests_today = 30
+ self.assertEqual(self.limiter.get_remaining_quota(), 70)
+
+ def test_get_usage_stats(self):
+ """Test usage stats returns correct structure."""
+ self.limiter.requests_today = 25
+ stats = self.limiter.get_usage_stats()
+ self.assertEqual(stats['requests_today'], 25)
+ self.assertEqual(stats['daily_limit'], 100)
+ self.assertEqual(stats['remaining'], 75)
+ self.assertEqual(stats['usage_percent'], 25.0)
+
+
+class TestGooglePageSpeedClient(unittest.TestCase):
+ """Tests for GooglePageSpeedClient class."""
+
+ def setUp(self):
+ """Set up test with mocked dependencies."""
+ import tempfile
+ self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False)
+ self.temp_file.close()
+
+ self.rate_limiter = RateLimiter(
+ daily_limit=100,
+ min_interval=0,
+ quota_file=self.temp_file.name
+ )
+ self.client = GooglePageSpeedClient(
+ api_key='test_api_key',
+ rate_limiter=self.rate_limiter
+ )
+
+ def tearDown(self):
+ """Clean up."""
+ import os
+ try:
+ os.unlink(self.temp_file.name)
+ except FileNotFoundError:
+ pass
+
+ def test_client_initialization(self):
+ """Test client initializes correctly."""
+ self.assertEqual(self.client.api_key, 'test_api_key')
+ self.assertIsNotNone(self.client.rate_limiter)
+
+ def test_client_without_api_key(self):
+ """Test client works without API key (with warning)."""
+ client = GooglePageSpeedClient(api_key='')
+ self.assertEqual(client.api_key, '')
+
+ @patch.object(GooglePageSpeedClient, '_make_request_with_retry')
+ def test_analyze_url_success(self, mock_request):
+ """Test successful URL analysis."""
+ mock_response = {
+ 'lighthouseResult': {
+ 'finalUrl': 'https://example.com',
+ 'categories': {
+ 'performance': {'score': 0.95},
+ 'accessibility': {'score': 0.88},
+ 'best-practices': {'score': 0.92},
+ 'seo': {'score': 1.0},
+ },
+ 'audits': {
+ 'largest-contentful-paint': {'numericValue': 1500},
+ 'cumulative-layout-shift': {'numericValue': 0.05},
+ },
+ 'lighthouseVersion': '11.0.0',
+ 'timing': {'total': 5000},
+ }
+ }
+ mock_request.return_value = mock_response
+
+ result = self.client.analyze_url('https://example.com')
+
+ self.assertIsInstance(result, PageSpeedResult)
+ self.assertEqual(result.url, 'https://example.com')
+ self.assertEqual(result.scores.performance, 95)
+ self.assertEqual(result.scores.seo, 100)
+ self.assertEqual(result.core_web_vitals.lcp_ms, 1500)
+
+ def test_analyze_url_quota_exceeded(self):
+ """Test QuotaExceededError when quota is 0."""
+ self.rate_limiter.requests_today = 100
+
+ with self.assertRaises(QuotaExceededError):
+ self.client.analyze_url('https://example.com')
+
+ @patch.object(GooglePageSpeedClient, '_make_request_with_retry')
+ def test_extract_score(self, mock_request):
+ """Test score extraction converts 0-1 to 0-100."""
+ mock_response = {
+ 'lighthouseResult': {
+ 'finalUrl': 'https://example.com',
+ 'categories': {
+ 'seo': {'score': 0.75},
+ },
+ 'audits': {},
+ }
+ }
+ mock_request.return_value = mock_response
+
+ result = self.client.analyze_url('https://example.com')
+ self.assertEqual(result.scores.seo, 75)
+
+ def test_get_remaining_quota(self):
+ """Test getting remaining quota."""
+ self.rate_limiter.requests_today = 20
+ self.assertEqual(self.client.get_remaining_quota(), 80)
+
+
+# ============================================================================
+# On-Page SEO Analyzer Tests
+# ============================================================================
+
+class TestOnPageSEOAnalyzer(unittest.TestCase):
+ """Tests for OnPageSEOAnalyzer class."""
+
+ def setUp(self):
+ """Set up analyzer."""
+ self.analyzer = OnPageSEOAnalyzer()
+
+ def test_analyze_empty_html(self):
+ """Test analysis of empty HTML."""
+ result = self.analyzer.analyze_html('', base_url='https://example.com')
+ self.assertIsInstance(result, OnPageSEOResult)
+ self.assertEqual(result.base_url, 'https://example.com')
+
+ def test_analyze_basic_html(self):
+ """Test analysis of basic HTML page."""
+ html = '''
+
+
+
+
+ Test Page Title
+
+
+
+
+
+ Main Heading
+ Some content here.
+ Section 1
+ Section 2
+
+
+ '''
+ result = self.analyzer.analyze_html(html, base_url='https://example.com')
+
+ # Check meta tags
+ self.assertEqual(result.meta_tags.title, 'Test Page Title')
+ self.assertEqual(result.meta_tags.description, 'This is a test page description')
+ self.assertEqual(result.meta_tags.viewport, 'width=device-width, initial-scale=1')
+ self.assertEqual(result.meta_tags.canonical_url, 'https://example.com/page')
+ self.assertTrue(result.has_doctype)
+ self.assertTrue(result.has_lang_attribute)
+ self.assertEqual(result.lang_attribute, 'pl')
+
+ def test_analyze_headings(self):
+ """Test heading structure analysis."""
+ html = '''
+
+ Test
+
+ Main Heading
+ Section 1
+ Section 2
+ Subsection
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ self.assertEqual(result.headings.h1_count, 1)
+ self.assertEqual(result.headings.h2_count, 2)
+ self.assertEqual(result.headings.h3_count, 1)
+ self.assertTrue(result.headings.has_single_h1)
+ self.assertTrue(result.headings.has_proper_hierarchy)
+ self.assertEqual(result.headings.h1_texts, ['Main Heading'])
+
+ def test_analyze_multiple_h1s(self):
+ """Test detection of multiple H1 headings (bad practice)."""
+ html = '''
+
+ Test
+
+ First H1
+ Second H1
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ self.assertEqual(result.headings.h1_count, 2)
+ self.assertFalse(result.headings.has_single_h1)
+ self.assertFalse(result.headings.has_proper_hierarchy)
+ self.assertIn('Multiple H1 headings (2)', result.headings.hierarchy_issues)
+
+ def test_analyze_missing_h1(self):
+ """Test detection of missing H1 heading."""
+ html = '''
+
+ Test
+
+ Section without H1
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ self.assertEqual(result.headings.h1_count, 0)
+ self.assertFalse(result.headings.has_proper_hierarchy)
+ self.assertIn('Missing H1 heading', result.headings.hierarchy_issues)
+
+ def test_analyze_images(self):
+ """Test image analysis."""
+ html = '''
+
+ Test
+
+
+
+
+
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ self.assertEqual(result.images.total_images, 4)
+ self.assertEqual(result.images.images_with_alt, 3) # includes empty alt
+ self.assertEqual(result.images.images_without_alt, 1)
+ self.assertEqual(result.images.images_with_empty_alt, 1)
+ self.assertEqual(len(result.images.alt_text_quality_issues), 1) # "image" is placeholder
+
+ def test_analyze_links_internal_external(self):
+ """Test link analysis distinguishing internal/external."""
+ html = '''
+
+ Test
+
+ Internal 1
+ Internal 2
+ Internal 3
+ External
+ Social
+ Broken
+
+
+ '''
+ result = self.analyzer.analyze_html(html, base_url='https://example.com')
+
+ self.assertEqual(result.links.total_links, 6)
+ self.assertEqual(result.links.internal_links, 3)
+ self.assertEqual(result.links.external_links, 2)
+ self.assertEqual(result.links.nofollow_links, 1)
+ self.assertEqual(result.links.broken_anchor_links, 1)
+
+ def test_analyze_open_graph(self):
+ """Test Open Graph metadata extraction."""
+ html = '''
+
+
+ Test
+
+
+
+
+
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ self.assertEqual(result.open_graph.og_title, 'OG Title')
+ self.assertEqual(result.open_graph.og_description, 'OG Description')
+ self.assertEqual(result.open_graph.og_image, 'https://example.com/image.jpg')
+ self.assertEqual(result.open_graph.og_type, 'website')
+
+ def test_analyze_twitter_card(self):
+ """Test Twitter Card metadata extraction."""
+ html = '''
+
+
+ Test
+
+
+
+
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ self.assertEqual(result.twitter_card.card_type, 'summary_large_image')
+ self.assertEqual(result.twitter_card.title, 'Twitter Title')
+ self.assertEqual(result.twitter_card.description, 'Twitter Description')
+
+ def test_analyze_structured_data_json_ld(self):
+ """Test JSON-LD structured data detection."""
+ html = '''
+
+
+ Test
+
+
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ self.assertTrue(result.structured_data.has_structured_data)
+ self.assertEqual(result.structured_data.json_ld_count, 1)
+ self.assertIn('LocalBusiness', result.structured_data.json_ld_types)
+ self.assertIn('LocalBusiness', result.structured_data.all_types)
+
+ def test_analyze_structured_data_microdata(self):
+ """Test Microdata structured data detection."""
+ html = '''
+
+ Test
+
+
+ Test Org
+
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ self.assertTrue(result.structured_data.has_structured_data)
+ self.assertEqual(result.structured_data.microdata_count, 1)
+ self.assertIn('Organization', result.structured_data.microdata_types)
+
+ def test_analyze_word_count(self):
+ """Test word count calculation."""
+ html = '''
+
+ Test
+
+ This is a sentence with seven words here.
+
+
+
+
+ '''
+ result = self.analyzer.analyze_html(html)
+
+ # Should count visible text only
+ self.assertGreater(result.word_count, 5)
+ self.assertLess(result.word_count, 20)
+
+ def test_result_to_dict(self):
+ """Test converting result to dictionary."""
+ html = 'TestHello
'
+ result = self.analyzer.analyze_html(html)
+
+ result_dict = result.to_dict()
+
+ self.assertIsInstance(result_dict, dict)
+ self.assertIn('meta_tags', result_dict)
+ self.assertIn('headings', result_dict)
+ self.assertIn('images', result_dict)
+ self.assertIn('links', result_dict)
+ self.assertIn('structured_data', result_dict)
+
+
+# ============================================================================
+# Technical SEO Checker Tests
+# ============================================================================
+
+class TestTechnicalSEOChecker(unittest.TestCase):
+ """Tests for TechnicalSEOChecker class."""
+
+ def setUp(self):
+ """Set up checker."""
+ self.checker = TechnicalSEOChecker(timeout=5)
+
+ @patch('requests.Session.get')
+ def test_check_robots_txt_exists(self, mock_get):
+ """Test robots.txt detection when it exists."""
+ mock_response = Mock()
+ mock_response.status_code = 200
+ mock_response.text = '''
+User-agent: *
+Disallow: /admin/
+Sitemap: https://example.com/sitemap.xml
+ '''
+ mock_get.return_value = mock_response
+
+ result = self.checker.check_robots_txt('https://example.com')
+
+ self.assertTrue(result.exists)
+ self.assertEqual(result.status_code, 200)
+ self.assertIn('/admin/', result.disallow_rules)
+ self.assertIn('https://example.com/sitemap.xml', result.sitemap_urls)
+
+ @patch('requests.Session.get')
+ def test_check_robots_txt_not_found(self, mock_get):
+ """Test robots.txt detection when it doesn't exist."""
+ mock_response = Mock()
+ mock_response.status_code = 404
+ mock_get.return_value = mock_response
+
+ result = self.checker.check_robots_txt('https://example.com')
+
+ self.assertFalse(result.exists)
+ self.assertEqual(result.status_code, 404)
+
+ @patch('requests.Session.get')
+ def test_check_robots_txt_blocks_googlebot(self, mock_get):
+ """Test detection of Googlebot blocking in robots.txt."""
+ mock_response = Mock()
+ mock_response.status_code = 200
+ mock_response.text = '''
+User-agent: Googlebot
+Disallow: /
+ '''
+ mock_get.return_value = mock_response
+
+ result = self.checker.check_robots_txt('https://example.com')
+
+ self.assertTrue(result.exists)
+ self.assertTrue(result.blocks_googlebot)
+
+ @patch('requests.Session.get')
+ def test_check_sitemap_valid_xml(self, mock_get):
+ """Test valid sitemap.xml detection."""
+ mock_response = Mock()
+ mock_response.status_code = 200
+ # Use simpler XML without namespace for reliable parsing
+ mock_response.content = b'''
+
+ https://example.com/
+ https://example.com/page1
+
+ '''
+ mock_response.headers = {'Last-Modified': 'Tue, 07 Jan 2026 10:00:00 GMT'}
+ mock_get.return_value = mock_response
+
+ result = self.checker.check_sitemap('https://example.com/sitemap.xml')
+
+ self.assertTrue(result.exists)
+ self.assertTrue(result.is_valid_xml)
+ self.assertFalse(result.is_sitemap_index)
+ self.assertEqual(result.url_count, 2)
+ self.assertIn('https://example.com/', result.sample_urls)
+
+ @patch('requests.Session.get')
+ def test_check_sitemap_index(self, mock_get):
+ """Test sitemap index detection."""
+ mock_response = Mock()
+ mock_response.status_code = 200
+ mock_response.content = b'''
+
+ https://example.com/sitemap1.xml
+ https://example.com/sitemap2.xml
+
+ '''
+ mock_response.headers = {}
+ mock_get.return_value = mock_response
+
+ result = self.checker.check_sitemap('https://example.com/sitemap.xml')
+
+ self.assertTrue(result.exists)
+ self.assertTrue(result.is_valid_xml)
+ self.assertTrue(result.is_sitemap_index)
+ self.assertEqual(result.sitemap_count, 2)
+
+ @patch('requests.Session.get')
+ def test_check_sitemap_not_found(self, mock_get):
+ """Test sitemap.xml detection when not found."""
+ mock_response = Mock()
+ mock_response.status_code = 404
+ mock_get.return_value = mock_response
+
+ result = self.checker.check_sitemap('https://example.com/sitemap.xml')
+
+ self.assertFalse(result.exists)
+
+ @patch('requests.Session.get')
+ def test_check_redirect_chain_no_redirects(self, mock_get):
+ """Test URL with no redirects."""
+ mock_response = Mock()
+ mock_response.status_code = 200
+ mock_get.return_value = mock_response
+
+ result = self.checker.check_redirect_chain('https://example.com')
+
+ self.assertEqual(result.chain_length, 0)
+ self.assertEqual(result.final_url, 'https://example.com')
+ self.assertFalse(result.has_redirect_loop)
+ self.assertEqual(len(result.redirects), 0)
+
+ @patch('requests.Session.get')
+ def test_check_redirect_chain_with_redirect(self, mock_get):
+ """Test URL with single redirect."""
+ # First call returns redirect
+ redirect_response = Mock()
+ redirect_response.status_code = 301
+ redirect_response.headers = {'Location': 'https://www.example.com/'}
+
+ # Second call returns final page
+ final_response = Mock()
+ final_response.status_code = 200
+
+ mock_get.side_effect = [redirect_response, final_response]
+
+ result = self.checker.check_redirect_chain('https://example.com')
+
+ self.assertEqual(result.chain_length, 1)
+ self.assertEqual(result.final_url, 'https://www.example.com/')
+ self.assertEqual(len(result.redirects), 1)
+ self.assertEqual(result.redirects[0].status_code, 301)
+
+ @patch('requests.Session.get')
+ def test_check_redirect_https_upgrade(self, mock_get):
+ """Test detection of HTTP to HTTPS upgrade redirect."""
+ redirect_response = Mock()
+ redirect_response.status_code = 301
+ redirect_response.headers = {'Location': 'https://example.com/'}
+
+ final_response = Mock()
+ final_response.status_code = 200
+
+ mock_get.side_effect = [redirect_response, final_response]
+
+ result = self.checker.check_redirect_chain('http://example.com')
+
+ self.assertEqual(result.chain_length, 1)
+ self.assertTrue(result.redirects[0].is_https_upgrade)
+
+ def test_check_canonical_self_referencing(self):
+ """Test detection of self-referencing canonical URL."""
+ html = '''
+
+
+
+
+
+
+ '''
+ result = self.checker._check_canonical(html, 'https://example.com/page')
+
+ self.assertTrue(result.has_canonical)
+ self.assertEqual(result.canonical_url, 'https://example.com/page')
+ self.assertTrue(result.is_self_referencing)
+ self.assertFalse(result.points_to_different_domain)
+
+ def test_check_canonical_different_domain(self):
+ """Test detection of canonical pointing to different domain."""
+ html = '''
+
+
+
+
+
+
+ '''
+ result = self.checker._check_canonical(html, 'https://example.com/page')
+
+ self.assertTrue(result.has_canonical)
+ self.assertTrue(result.points_to_different_domain)
+
+ def test_check_indexability_noindex_meta(self):
+ """Test detection of noindex meta tag."""
+ mock_response = Mock()
+ mock_response.text = '''
+
+
+
+
+
+
+ '''
+ mock_response.headers = {}
+
+ result = self.checker._check_indexability(mock_response)
+
+ self.assertFalse(result.is_indexable)
+ self.assertTrue(result.has_noindex_meta)
+ self.assertEqual(result.noindex_source, 'meta')
+
+ def test_check_indexability_noindex_header(self):
+ """Test detection of X-Robots-Tag noindex header."""
+ mock_response = Mock()
+ mock_response.text = ''
+ mock_response.headers = {'X-Robots-Tag': 'noindex'}
+
+ result = self.checker._check_indexability(mock_response)
+
+ self.assertFalse(result.is_indexable)
+ self.assertTrue(result.has_noindex_header)
+ self.assertEqual(result.noindex_source, 'header')
+
+ def test_check_indexability_indexable(self):
+ """Test page that is indexable."""
+ mock_response = Mock()
+ mock_response.text = '''
+
+
+
+
+
+
+ '''
+ mock_response.headers = {}
+
+ result = self.checker._check_indexability(mock_response)
+
+ self.assertTrue(result.is_indexable)
+ self.assertFalse(result.has_noindex_meta)
+ self.assertFalse(result.has_noindex_header)
+
+ def test_result_to_dict(self):
+ """Test converting technical result to dictionary."""
+ result = TechnicalSEOResult(
+ url='https://example.com',
+ checked_at='2026-01-08T10:00:00',
+ robots_txt=RobotsTxtResult(exists=True),
+ sitemap=SitemapResult(exists=True),
+ redirect_chain=RedirectChainResult(original_url='https://example.com', final_url='https://example.com'),
+ canonical=CanonicalResult(has_canonical=True),
+ indexability=IndexabilityResult(is_indexable=True),
+ )
+
+ result_dict = result.to_dict()
+
+ self.assertIsInstance(result_dict, dict)
+ self.assertEqual(result_dict['url'], 'https://example.com')
+ self.assertIn('robots_txt', result_dict)
+ self.assertIn('sitemap', result_dict)
+ self.assertIn('redirect_chain', result_dict)
+ self.assertIn('canonical', result_dict)
+ self.assertIn('indexability', result_dict)
+
+
+# ============================================================================
+# SEO Audit Integration Tests
+# ============================================================================
+
+class TestSEOAuditScoreCalculation(unittest.TestCase):
+ """Tests for SEO score calculation logic."""
+
+ def setUp(self):
+ """Set up with mocked auditor."""
+ # Import SEOAuditor here since it may need database
+ with patch('seo_audit.create_engine'), \
+ patch('seo_audit.sessionmaker'):
+ from seo_audit import SEOAuditor
+ self.auditor = SEOAuditor.__new__(SEOAuditor)
+ self.auditor.engine = Mock()
+ self.auditor.Session = Mock()
+ self.auditor.pagespeed_client = Mock()
+ self.auditor.onpage_analyzer = OnPageSEOAnalyzer()
+ self.auditor.technical_checker = Mock()
+ self.auditor.session = Mock()
+
+ def test_calculate_onpage_score_perfect(self):
+ """Test on-page score calculation with perfect page."""
+ onpage = {
+ 'meta_tags': {
+ 'title': 'Perfect Title for SEO Optimization',
+ 'title_length': 38,
+ 'description': 'This is a perfect meta description that is between 120 and 160 characters long for optimal SEO results.',
+ 'description_length': 105,
+ 'canonical_url': 'https://example.com/page',
+ },
+ 'headings': {
+ 'h1_count': 1,
+ 'has_proper_hierarchy': True,
+ },
+ 'images': {
+ 'total_images': 10,
+ 'images_without_alt': 0,
+ },
+ 'structured_data': {
+ 'has_structured_data': True,
+ },
+ 'open_graph': {
+ 'og_title': 'OG Title',
+ },
+ }
+
+ score = self.auditor._calculate_onpage_score(onpage)
+
+ # Should be high score with minor deductions
+ self.assertGreaterEqual(score, 90)
+
+ def test_calculate_onpage_score_missing_title(self):
+ """Test on-page score with missing title."""
+ onpage = {
+ 'meta_tags': {
+ 'title': None,
+ 'description': 'Some description',
+ 'description_length': 50,
+ },
+ 'headings': {'h1_count': 1, 'has_proper_hierarchy': True},
+ 'images': {'total_images': 0, 'images_without_alt': 0},
+ 'structured_data': {'has_structured_data': False},
+ 'open_graph': {},
+ }
+
+ score = self.auditor._calculate_onpage_score(onpage)
+
+ # Should have significant deduction for missing title (-15)
+ self.assertLessEqual(score, 85)
+
+ def test_calculate_onpage_score_missing_h1(self):
+ """Test on-page score with missing H1."""
+ onpage = {
+ 'meta_tags': {
+ 'title': 'Good Title',
+ 'title_length': 10,
+ 'description': 'Good description',
+ 'description_length': 50,
+ },
+ 'headings': {'h1_count': 0, 'has_proper_hierarchy': False},
+ 'images': {'total_images': 0, 'images_without_alt': 0},
+ 'structured_data': {'has_structured_data': False},
+ 'open_graph': {},
+ }
+
+ score = self.auditor._calculate_onpage_score(onpage)
+
+ # Should have deduction for missing H1 (-10) and no structured data (-5)
+ self.assertLessEqual(score, 85)
+
+ def test_calculate_technical_score_perfect(self):
+ """Test technical score with perfect setup."""
+ technical = {
+ 'robots_txt': {
+ 'exists': True,
+ 'blocks_googlebot': False,
+ },
+ 'sitemap': {
+ 'exists': True,
+ 'is_valid_xml': True,
+ },
+ 'redirect_chain': {
+ 'chain_length': 0,
+ 'has_redirect_loop': False,
+ },
+ 'indexability': {
+ 'is_indexable': True,
+ },
+ 'canonical': {
+ 'has_canonical': True,
+ 'points_to_different_domain': False,
+ },
+ }
+
+ score = self.auditor._calculate_technical_score(technical)
+
+ self.assertEqual(score, 100)
+
+ def test_calculate_technical_score_no_robots(self):
+ """Test technical score without robots.txt."""
+ technical = {
+ 'robots_txt': {'exists': False},
+ 'sitemap': {'exists': True, 'is_valid_xml': True},
+ 'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False},
+ 'indexability': {'is_indexable': True},
+ 'canonical': {'has_canonical': True, 'points_to_different_domain': False},
+ }
+
+ score = self.auditor._calculate_technical_score(technical)
+
+ # -10 for missing robots.txt
+ self.assertEqual(score, 90)
+
+ def test_calculate_technical_score_blocks_googlebot(self):
+ """Test technical score when blocking Googlebot."""
+ technical = {
+ 'robots_txt': {'exists': True, 'blocks_googlebot': True},
+ 'sitemap': {'exists': True, 'is_valid_xml': True},
+ 'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False},
+ 'indexability': {'is_indexable': True},
+ 'canonical': {'has_canonical': True, 'points_to_different_domain': False},
+ }
+
+ score = self.auditor._calculate_technical_score(technical)
+
+ # -20 for blocking Googlebot
+ self.assertEqual(score, 80)
+
+
+class TestSEOAuditResultCategorization(unittest.TestCase):
+ """Tests for result categorization logic."""
+
+ def setUp(self):
+ """Set up with mocked auditor."""
+ with patch('seo_audit.create_engine'), \
+ patch('seo_audit.sessionmaker'):
+ from seo_audit import SEOAuditor
+ self.auditor = SEOAuditor.__new__(SEOAuditor)
+
+ def test_categorize_success(self):
+ """Test categorizing successful audit."""
+ result = {
+ 'errors': [],
+ 'http_status': 200,
+ 'onpage': {'meta_tags': {}},
+ 'technical': {},
+ }
+
+ category = self.auditor._categorize_result(result)
+ self.assertEqual(category, 'success')
+
+ def test_categorize_no_website(self):
+ """Test categorizing company with no website."""
+ result = {
+ 'errors': ['No website URL configured'],
+ }
+
+ category = self.auditor._categorize_result(result)
+ self.assertEqual(category, 'no_website')
+
+ def test_categorize_timeout(self):
+ """Test categorizing timeout error."""
+ result = {
+ 'errors': ['Timeout after 30s'],
+ }
+
+ category = self.auditor._categorize_result(result)
+ self.assertEqual(category, 'timeout')
+
+ def test_categorize_connection_error(self):
+ """Test categorizing connection error."""
+ result = {
+ 'errors': ['Connection error: Failed to establish connection'],
+ }
+
+ category = self.auditor._categorize_result(result)
+ self.assertEqual(category, 'connection_error')
+
+ def test_categorize_ssl_error(self):
+ """Test categorizing SSL error."""
+ result = {
+ 'errors': ['SSL Error: Certificate verify failed'],
+ }
+
+ category = self.auditor._categorize_result(result)
+ self.assertEqual(category, 'ssl_error')
+
+ def test_categorize_http_error(self):
+ """Test categorizing HTTP error (4xx/5xx)."""
+ result = {
+ 'errors': ['HTTP 404'],
+ 'http_status': 404,
+ }
+
+ category = self.auditor._categorize_result(result)
+ self.assertEqual(category, 'unavailable')
+
+
+class TestParseBatchArgument(unittest.TestCase):
+ """Tests for batch argument parsing."""
+
+ def test_parse_valid_batch(self):
+ """Test parsing valid batch argument."""
+ from seo_audit import parse_batch_argument
+
+ start, end = parse_batch_argument('1-10')
+ self.assertEqual(start, 1)
+ self.assertEqual(end, 10)
+
+ def test_parse_batch_with_spaces(self):
+ """Test parsing batch with spaces."""
+ from seo_audit import parse_batch_argument
+
+ start, end = parse_batch_argument(' 5 - 20 ')
+ self.assertEqual(start, 5)
+ self.assertEqual(end, 20)
+
+ def test_parse_invalid_format_no_dash(self):
+ """Test parsing batch without dash fails."""
+ from seo_audit import parse_batch_argument
+
+ with self.assertRaises(ValueError) as ctx:
+ parse_batch_argument('10')
+ self.assertIn('Invalid batch format', str(ctx.exception))
+
+ def test_parse_invalid_format_multiple_dashes(self):
+ """Test parsing batch with multiple dashes fails."""
+ from seo_audit import parse_batch_argument
+
+ with self.assertRaises(ValueError) as ctx:
+ parse_batch_argument('1-5-10')
+ self.assertIn('Invalid batch format', str(ctx.exception))
+
+ def test_parse_invalid_values_not_numbers(self):
+ """Test parsing batch with non-numeric values fails."""
+ from seo_audit import parse_batch_argument
+
+ with self.assertRaises(ValueError) as ctx:
+ parse_batch_argument('a-b')
+ self.assertIn('Invalid batch values', str(ctx.exception))
+
+ def test_parse_invalid_start_less_than_one(self):
+ """Test parsing batch with start < 1 fails."""
+ from seo_audit import parse_batch_argument
+
+ with self.assertRaises(ValueError) as ctx:
+ parse_batch_argument('0-10')
+ self.assertIn('Must be >= 1', str(ctx.exception))
+
+ def test_parse_invalid_end_less_than_start(self):
+ """Test parsing batch with end < start fails."""
+ from seo_audit import parse_batch_argument
+
+ with self.assertRaises(ValueError) as ctx:
+ parse_batch_argument('10-5')
+ self.assertIn('END must be >= START', str(ctx.exception))
+
+
+# ============================================================================
+# Helper Function Tests
+# ============================================================================
+
+class TestConvenienceFunctions(unittest.TestCase):
+ """Tests for convenience functions."""
+
+ def test_analyze_html_function(self):
+ """Test analyze_html convenience function."""
+ from seo_analyzer import analyze_html
+
+ html = 'TestHello
'
+ result = analyze_html(html, base_url='https://example.com')
+
+ self.assertIsInstance(result, dict)
+ self.assertEqual(result['meta_tags']['title'], 'Test')
+ self.assertEqual(result['headings']['h1_count'], 1)
+
+
+# ============================================================================
+# Run Tests
+# ============================================================================
+
+if __name__ == '__main__':
+ # Run with verbose output
+ unittest.main(verbosity=2)