From 92055bbe606b219b2c158f1b039c50b5fd7872ce Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Thu, 8 Jan 2026 09:18:15 +0100 Subject: [PATCH] auto-claude: 8.1 - Create tests/test_seo_audit.py with tests for PageSpeed client, on-page analyzer, technical checker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implemented comprehensive unit test suite for SEO audit functionality: - TestPageSpeedScore: Dataclass creation and serialization - TestCoreWebVitals: Core Web Vitals dataclass tests - TestRateLimiter: Rate limiting and quota tracking (7 tests) - TestGooglePageSpeedClient: API client with mocked responses (6 tests) - TestOnPageSEOAnalyzer: HTML analysis including meta tags, headings, images, links, structured data, Open Graph, Twitter Cards (12 tests) - TestTechnicalSEOChecker: robots.txt, sitemap.xml, redirects, canonical, indexability (13 tests) - TestSEOAuditScoreCalculation: On-page and technical score calculation - TestSEOAuditResultCategorization: Result categorization logic - TestParseBatchArgument: CLI batch argument parsing - TestConvenienceFunctions: Helper functions Total: 64 tests, all passing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- tests/test_seo_audit.py | 1111 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 1111 insertions(+) create mode 100644 tests/test_seo_audit.py diff --git a/tests/test_seo_audit.py b/tests/test_seo_audit.py new file mode 100644 index 0000000..dcff784 --- /dev/null +++ b/tests/test_seo_audit.py @@ -0,0 +1,1111 @@ +#!/usr/bin/env python3 +""" +Unit Tests for SEO Audit Functionality +====================================== + +Tests for: +- PageSpeed API client (scripts/pagespeed_client.py) +- On-Page SEO Analyzer (scripts/seo_analyzer.py) +- Technical SEO Checker (scripts/seo_analyzer.py) +- SEO Audit database operations (scripts/seo_audit.py) + +Run tests: + cd tests + python -m pytest test_seo_audit.py -v + +Author: Claude Code +Date: 2026-01-08 +""" + +import json +import sys +import unittest +from datetime import datetime, date +from pathlib import Path +from unittest.mock import Mock, MagicMock, patch, PropertyMock + +# Add scripts directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent / 'scripts')) + +# Import modules to test +from pagespeed_client import ( + GooglePageSpeedClient, + PageSpeedResult, + PageSpeedScore, + CoreWebVitals, + RateLimiter, + PageSpeedAPIError, + QuotaExceededError, + RateLimitError, + Strategy, + Category, +) +from seo_analyzer import ( + OnPageSEOAnalyzer, + OnPageSEOResult, + MetaTags, + OpenGraphData, + TwitterCardData, + HeadingStructure, + ImageAnalysis, + LinkAnalysis, + StructuredData, + TechnicalSEOChecker, + TechnicalSEOResult, + RobotsTxtResult, + SitemapResult, + RedirectChainResult, + RedirectInfo, + CanonicalResult, + IndexabilityResult, +) + + +# ============================================================================ +# PageSpeed Client Tests +# ============================================================================ + +class TestPageSpeedScore(unittest.TestCase): + """Tests for PageSpeedScore dataclass.""" + + def test_create_score(self): + """Test creating PageSpeedScore with all values.""" + score = PageSpeedScore( + performance=95, + accessibility=88, + best_practices=92, + seo=100 + ) + self.assertEqual(score.performance, 95) + self.assertEqual(score.accessibility, 88) + self.assertEqual(score.best_practices, 92) + self.assertEqual(score.seo, 100) + + def test_score_to_dict(self): + """Test converting score to dictionary.""" + score = PageSpeedScore(performance=95, seo=100) + result = score.to_dict() + self.assertIsInstance(result, dict) + self.assertEqual(result['performance'], 95) + self.assertEqual(result['seo'], 100) + self.assertIsNone(result['accessibility']) + self.assertIsNone(result['best_practices']) + + +class TestCoreWebVitals(unittest.TestCase): + """Tests for CoreWebVitals dataclass.""" + + def test_create_vitals(self): + """Test creating CoreWebVitals.""" + vitals = CoreWebVitals( + lcp_ms=1500, + fid_ms=50, + cls=0.05, + fcp_ms=1200, + ttfb_ms=200 + ) + self.assertEqual(vitals.lcp_ms, 1500) + self.assertEqual(vitals.fid_ms, 50) + self.assertEqual(vitals.cls, 0.05) + + def test_vitals_to_dict(self): + """Test converting vitals to dictionary.""" + vitals = CoreWebVitals(lcp_ms=1500, cls=0.1) + result = vitals.to_dict() + self.assertEqual(result['lcp_ms'], 1500) + self.assertEqual(result['cls'], 0.1) + + +class TestRateLimiter(unittest.TestCase): + """Tests for RateLimiter class.""" + + def setUp(self): + """Set up test with a temporary quota file.""" + import tempfile + self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) + self.temp_file.close() + self.limiter = RateLimiter( + daily_limit=100, + min_interval=0.1, + quota_file=self.temp_file.name + ) + + def tearDown(self): + """Clean up temp file.""" + import os + try: + os.unlink(self.temp_file.name) + except FileNotFoundError: + pass + + def test_initial_state(self): + """Test initial state of rate limiter.""" + self.assertEqual(self.limiter.daily_limit, 100) + self.assertEqual(self.limiter.requests_today, 0) + self.assertEqual(self.limiter.get_remaining_quota(), 100) + + def test_can_make_request_when_under_quota(self): + """Test can_make_request returns True when under quota.""" + self.assertTrue(self.limiter.can_make_request()) + + def test_can_make_request_when_quota_exceeded(self): + """Test can_make_request returns False when quota exceeded.""" + self.limiter.requests_today = 100 + self.assertFalse(self.limiter.can_make_request()) + + def test_record_request(self): + """Test recording a request updates counter.""" + initial = self.limiter.requests_today + self.limiter.record_request() + self.assertEqual(self.limiter.requests_today, initial + 1) + + def test_get_remaining_quota(self): + """Test remaining quota calculation.""" + self.limiter.requests_today = 30 + self.assertEqual(self.limiter.get_remaining_quota(), 70) + + def test_get_usage_stats(self): + """Test usage stats returns correct structure.""" + self.limiter.requests_today = 25 + stats = self.limiter.get_usage_stats() + self.assertEqual(stats['requests_today'], 25) + self.assertEqual(stats['daily_limit'], 100) + self.assertEqual(stats['remaining'], 75) + self.assertEqual(stats['usage_percent'], 25.0) + + +class TestGooglePageSpeedClient(unittest.TestCase): + """Tests for GooglePageSpeedClient class.""" + + def setUp(self): + """Set up test with mocked dependencies.""" + import tempfile + self.temp_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) + self.temp_file.close() + + self.rate_limiter = RateLimiter( + daily_limit=100, + min_interval=0, + quota_file=self.temp_file.name + ) + self.client = GooglePageSpeedClient( + api_key='test_api_key', + rate_limiter=self.rate_limiter + ) + + def tearDown(self): + """Clean up.""" + import os + try: + os.unlink(self.temp_file.name) + except FileNotFoundError: + pass + + def test_client_initialization(self): + """Test client initializes correctly.""" + self.assertEqual(self.client.api_key, 'test_api_key') + self.assertIsNotNone(self.client.rate_limiter) + + def test_client_without_api_key(self): + """Test client works without API key (with warning).""" + client = GooglePageSpeedClient(api_key='') + self.assertEqual(client.api_key, '') + + @patch.object(GooglePageSpeedClient, '_make_request_with_retry') + def test_analyze_url_success(self, mock_request): + """Test successful URL analysis.""" + mock_response = { + 'lighthouseResult': { + 'finalUrl': 'https://example.com', + 'categories': { + 'performance': {'score': 0.95}, + 'accessibility': {'score': 0.88}, + 'best-practices': {'score': 0.92}, + 'seo': {'score': 1.0}, + }, + 'audits': { + 'largest-contentful-paint': {'numericValue': 1500}, + 'cumulative-layout-shift': {'numericValue': 0.05}, + }, + 'lighthouseVersion': '11.0.0', + 'timing': {'total': 5000}, + } + } + mock_request.return_value = mock_response + + result = self.client.analyze_url('https://example.com') + + self.assertIsInstance(result, PageSpeedResult) + self.assertEqual(result.url, 'https://example.com') + self.assertEqual(result.scores.performance, 95) + self.assertEqual(result.scores.seo, 100) + self.assertEqual(result.core_web_vitals.lcp_ms, 1500) + + def test_analyze_url_quota_exceeded(self): + """Test QuotaExceededError when quota is 0.""" + self.rate_limiter.requests_today = 100 + + with self.assertRaises(QuotaExceededError): + self.client.analyze_url('https://example.com') + + @patch.object(GooglePageSpeedClient, '_make_request_with_retry') + def test_extract_score(self, mock_request): + """Test score extraction converts 0-1 to 0-100.""" + mock_response = { + 'lighthouseResult': { + 'finalUrl': 'https://example.com', + 'categories': { + 'seo': {'score': 0.75}, + }, + 'audits': {}, + } + } + mock_request.return_value = mock_response + + result = self.client.analyze_url('https://example.com') + self.assertEqual(result.scores.seo, 75) + + def test_get_remaining_quota(self): + """Test getting remaining quota.""" + self.rate_limiter.requests_today = 20 + self.assertEqual(self.client.get_remaining_quota(), 80) + + +# ============================================================================ +# On-Page SEO Analyzer Tests +# ============================================================================ + +class TestOnPageSEOAnalyzer(unittest.TestCase): + """Tests for OnPageSEOAnalyzer class.""" + + def setUp(self): + """Set up analyzer.""" + self.analyzer = OnPageSEOAnalyzer() + + def test_analyze_empty_html(self): + """Test analysis of empty HTML.""" + result = self.analyzer.analyze_html('', base_url='https://example.com') + self.assertIsInstance(result, OnPageSEOResult) + self.assertEqual(result.base_url, 'https://example.com') + + def test_analyze_basic_html(self): + """Test analysis of basic HTML page.""" + html = ''' + + + + + Test Page Title + + + + + +

Main Heading

+

Some content here.

+

Section 1

+

Section 2

+ + + ''' + result = self.analyzer.analyze_html(html, base_url='https://example.com') + + # Check meta tags + self.assertEqual(result.meta_tags.title, 'Test Page Title') + self.assertEqual(result.meta_tags.description, 'This is a test page description') + self.assertEqual(result.meta_tags.viewport, 'width=device-width, initial-scale=1') + self.assertEqual(result.meta_tags.canonical_url, 'https://example.com/page') + self.assertTrue(result.has_doctype) + self.assertTrue(result.has_lang_attribute) + self.assertEqual(result.lang_attribute, 'pl') + + def test_analyze_headings(self): + """Test heading structure analysis.""" + html = ''' + + Test + +

Main Heading

+

Section 1

+

Section 2

+

Subsection

+ + + ''' + result = self.analyzer.analyze_html(html) + + self.assertEqual(result.headings.h1_count, 1) + self.assertEqual(result.headings.h2_count, 2) + self.assertEqual(result.headings.h3_count, 1) + self.assertTrue(result.headings.has_single_h1) + self.assertTrue(result.headings.has_proper_hierarchy) + self.assertEqual(result.headings.h1_texts, ['Main Heading']) + + def test_analyze_multiple_h1s(self): + """Test detection of multiple H1 headings (bad practice).""" + html = ''' + + Test + +

First H1

+

Second H1

+ + + ''' + result = self.analyzer.analyze_html(html) + + self.assertEqual(result.headings.h1_count, 2) + self.assertFalse(result.headings.has_single_h1) + self.assertFalse(result.headings.has_proper_hierarchy) + self.assertIn('Multiple H1 headings (2)', result.headings.hierarchy_issues) + + def test_analyze_missing_h1(self): + """Test detection of missing H1 heading.""" + html = ''' + + Test + +

Section without H1

+ + + ''' + result = self.analyzer.analyze_html(html) + + self.assertEqual(result.headings.h1_count, 0) + self.assertFalse(result.headings.has_proper_hierarchy) + self.assertIn('Missing H1 heading', result.headings.hierarchy_issues) + + def test_analyze_images(self): + """Test image analysis.""" + html = ''' + + Test + + Good alt text + + + image + + + ''' + result = self.analyzer.analyze_html(html) + + self.assertEqual(result.images.total_images, 4) + self.assertEqual(result.images.images_with_alt, 3) # includes empty alt + self.assertEqual(result.images.images_without_alt, 1) + self.assertEqual(result.images.images_with_empty_alt, 1) + self.assertEqual(len(result.images.alt_text_quality_issues), 1) # "image" is placeholder + + def test_analyze_links_internal_external(self): + """Test link analysis distinguishing internal/external.""" + html = ''' + + Test + + Internal 1 + Internal 2 + Internal 3 + External + Social + Broken + + + ''' + result = self.analyzer.analyze_html(html, base_url='https://example.com') + + self.assertEqual(result.links.total_links, 6) + self.assertEqual(result.links.internal_links, 3) + self.assertEqual(result.links.external_links, 2) + self.assertEqual(result.links.nofollow_links, 1) + self.assertEqual(result.links.broken_anchor_links, 1) + + def test_analyze_open_graph(self): + """Test Open Graph metadata extraction.""" + html = ''' + + + Test + + + + + + + + ''' + result = self.analyzer.analyze_html(html) + + self.assertEqual(result.open_graph.og_title, 'OG Title') + self.assertEqual(result.open_graph.og_description, 'OG Description') + self.assertEqual(result.open_graph.og_image, 'https://example.com/image.jpg') + self.assertEqual(result.open_graph.og_type, 'website') + + def test_analyze_twitter_card(self): + """Test Twitter Card metadata extraction.""" + html = ''' + + + Test + + + + + + + ''' + result = self.analyzer.analyze_html(html) + + self.assertEqual(result.twitter_card.card_type, 'summary_large_image') + self.assertEqual(result.twitter_card.title, 'Twitter Title') + self.assertEqual(result.twitter_card.description, 'Twitter Description') + + def test_analyze_structured_data_json_ld(self): + """Test JSON-LD structured data detection.""" + html = ''' + + + Test + + + + + ''' + result = self.analyzer.analyze_html(html) + + self.assertTrue(result.structured_data.has_structured_data) + self.assertEqual(result.structured_data.json_ld_count, 1) + self.assertIn('LocalBusiness', result.structured_data.json_ld_types) + self.assertIn('LocalBusiness', result.structured_data.all_types) + + def test_analyze_structured_data_microdata(self): + """Test Microdata structured data detection.""" + html = ''' + + Test + +
+ Test Org +
+ + + ''' + result = self.analyzer.analyze_html(html) + + self.assertTrue(result.structured_data.has_structured_data) + self.assertEqual(result.structured_data.microdata_count, 1) + self.assertIn('Organization', result.structured_data.microdata_types) + + def test_analyze_word_count(self): + """Test word count calculation.""" + html = ''' + + Test + +

This is a sentence with seven words here.

+ + + + + ''' + result = self.analyzer.analyze_html(html) + + # Should count visible text only + self.assertGreater(result.word_count, 5) + self.assertLess(result.word_count, 20) + + def test_result_to_dict(self): + """Test converting result to dictionary.""" + html = 'Test

Hello

' + result = self.analyzer.analyze_html(html) + + result_dict = result.to_dict() + + self.assertIsInstance(result_dict, dict) + self.assertIn('meta_tags', result_dict) + self.assertIn('headings', result_dict) + self.assertIn('images', result_dict) + self.assertIn('links', result_dict) + self.assertIn('structured_data', result_dict) + + +# ============================================================================ +# Technical SEO Checker Tests +# ============================================================================ + +class TestTechnicalSEOChecker(unittest.TestCase): + """Tests for TechnicalSEOChecker class.""" + + def setUp(self): + """Set up checker.""" + self.checker = TechnicalSEOChecker(timeout=5) + + @patch('requests.Session.get') + def test_check_robots_txt_exists(self, mock_get): + """Test robots.txt detection when it exists.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.text = ''' +User-agent: * +Disallow: /admin/ +Sitemap: https://example.com/sitemap.xml + ''' + mock_get.return_value = mock_response + + result = self.checker.check_robots_txt('https://example.com') + + self.assertTrue(result.exists) + self.assertEqual(result.status_code, 200) + self.assertIn('/admin/', result.disallow_rules) + self.assertIn('https://example.com/sitemap.xml', result.sitemap_urls) + + @patch('requests.Session.get') + def test_check_robots_txt_not_found(self, mock_get): + """Test robots.txt detection when it doesn't exist.""" + mock_response = Mock() + mock_response.status_code = 404 + mock_get.return_value = mock_response + + result = self.checker.check_robots_txt('https://example.com') + + self.assertFalse(result.exists) + self.assertEqual(result.status_code, 404) + + @patch('requests.Session.get') + def test_check_robots_txt_blocks_googlebot(self, mock_get): + """Test detection of Googlebot blocking in robots.txt.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.text = ''' +User-agent: Googlebot +Disallow: / + ''' + mock_get.return_value = mock_response + + result = self.checker.check_robots_txt('https://example.com') + + self.assertTrue(result.exists) + self.assertTrue(result.blocks_googlebot) + + @patch('requests.Session.get') + def test_check_sitemap_valid_xml(self, mock_get): + """Test valid sitemap.xml detection.""" + mock_response = Mock() + mock_response.status_code = 200 + # Use simpler XML without namespace for reliable parsing + mock_response.content = b''' + + https://example.com/ + https://example.com/page1 + + ''' + mock_response.headers = {'Last-Modified': 'Tue, 07 Jan 2026 10:00:00 GMT'} + mock_get.return_value = mock_response + + result = self.checker.check_sitemap('https://example.com/sitemap.xml') + + self.assertTrue(result.exists) + self.assertTrue(result.is_valid_xml) + self.assertFalse(result.is_sitemap_index) + self.assertEqual(result.url_count, 2) + self.assertIn('https://example.com/', result.sample_urls) + + @patch('requests.Session.get') + def test_check_sitemap_index(self, mock_get): + """Test sitemap index detection.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.content = b''' + + https://example.com/sitemap1.xml + https://example.com/sitemap2.xml + + ''' + mock_response.headers = {} + mock_get.return_value = mock_response + + result = self.checker.check_sitemap('https://example.com/sitemap.xml') + + self.assertTrue(result.exists) + self.assertTrue(result.is_valid_xml) + self.assertTrue(result.is_sitemap_index) + self.assertEqual(result.sitemap_count, 2) + + @patch('requests.Session.get') + def test_check_sitemap_not_found(self, mock_get): + """Test sitemap.xml detection when not found.""" + mock_response = Mock() + mock_response.status_code = 404 + mock_get.return_value = mock_response + + result = self.checker.check_sitemap('https://example.com/sitemap.xml') + + self.assertFalse(result.exists) + + @patch('requests.Session.get') + def test_check_redirect_chain_no_redirects(self, mock_get): + """Test URL with no redirects.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_get.return_value = mock_response + + result = self.checker.check_redirect_chain('https://example.com') + + self.assertEqual(result.chain_length, 0) + self.assertEqual(result.final_url, 'https://example.com') + self.assertFalse(result.has_redirect_loop) + self.assertEqual(len(result.redirects), 0) + + @patch('requests.Session.get') + def test_check_redirect_chain_with_redirect(self, mock_get): + """Test URL with single redirect.""" + # First call returns redirect + redirect_response = Mock() + redirect_response.status_code = 301 + redirect_response.headers = {'Location': 'https://www.example.com/'} + + # Second call returns final page + final_response = Mock() + final_response.status_code = 200 + + mock_get.side_effect = [redirect_response, final_response] + + result = self.checker.check_redirect_chain('https://example.com') + + self.assertEqual(result.chain_length, 1) + self.assertEqual(result.final_url, 'https://www.example.com/') + self.assertEqual(len(result.redirects), 1) + self.assertEqual(result.redirects[0].status_code, 301) + + @patch('requests.Session.get') + def test_check_redirect_https_upgrade(self, mock_get): + """Test detection of HTTP to HTTPS upgrade redirect.""" + redirect_response = Mock() + redirect_response.status_code = 301 + redirect_response.headers = {'Location': 'https://example.com/'} + + final_response = Mock() + final_response.status_code = 200 + + mock_get.side_effect = [redirect_response, final_response] + + result = self.checker.check_redirect_chain('http://example.com') + + self.assertEqual(result.chain_length, 1) + self.assertTrue(result.redirects[0].is_https_upgrade) + + def test_check_canonical_self_referencing(self): + """Test detection of self-referencing canonical URL.""" + html = ''' + + + + + + + ''' + result = self.checker._check_canonical(html, 'https://example.com/page') + + self.assertTrue(result.has_canonical) + self.assertEqual(result.canonical_url, 'https://example.com/page') + self.assertTrue(result.is_self_referencing) + self.assertFalse(result.points_to_different_domain) + + def test_check_canonical_different_domain(self): + """Test detection of canonical pointing to different domain.""" + html = ''' + + + + + + + ''' + result = self.checker._check_canonical(html, 'https://example.com/page') + + self.assertTrue(result.has_canonical) + self.assertTrue(result.points_to_different_domain) + + def test_check_indexability_noindex_meta(self): + """Test detection of noindex meta tag.""" + mock_response = Mock() + mock_response.text = ''' + + + + + + + ''' + mock_response.headers = {} + + result = self.checker._check_indexability(mock_response) + + self.assertFalse(result.is_indexable) + self.assertTrue(result.has_noindex_meta) + self.assertEqual(result.noindex_source, 'meta') + + def test_check_indexability_noindex_header(self): + """Test detection of X-Robots-Tag noindex header.""" + mock_response = Mock() + mock_response.text = '' + mock_response.headers = {'X-Robots-Tag': 'noindex'} + + result = self.checker._check_indexability(mock_response) + + self.assertFalse(result.is_indexable) + self.assertTrue(result.has_noindex_header) + self.assertEqual(result.noindex_source, 'header') + + def test_check_indexability_indexable(self): + """Test page that is indexable.""" + mock_response = Mock() + mock_response.text = ''' + + + + + + + ''' + mock_response.headers = {} + + result = self.checker._check_indexability(mock_response) + + self.assertTrue(result.is_indexable) + self.assertFalse(result.has_noindex_meta) + self.assertFalse(result.has_noindex_header) + + def test_result_to_dict(self): + """Test converting technical result to dictionary.""" + result = TechnicalSEOResult( + url='https://example.com', + checked_at='2026-01-08T10:00:00', + robots_txt=RobotsTxtResult(exists=True), + sitemap=SitemapResult(exists=True), + redirect_chain=RedirectChainResult(original_url='https://example.com', final_url='https://example.com'), + canonical=CanonicalResult(has_canonical=True), + indexability=IndexabilityResult(is_indexable=True), + ) + + result_dict = result.to_dict() + + self.assertIsInstance(result_dict, dict) + self.assertEqual(result_dict['url'], 'https://example.com') + self.assertIn('robots_txt', result_dict) + self.assertIn('sitemap', result_dict) + self.assertIn('redirect_chain', result_dict) + self.assertIn('canonical', result_dict) + self.assertIn('indexability', result_dict) + + +# ============================================================================ +# SEO Audit Integration Tests +# ============================================================================ + +class TestSEOAuditScoreCalculation(unittest.TestCase): + """Tests for SEO score calculation logic.""" + + def setUp(self): + """Set up with mocked auditor.""" + # Import SEOAuditor here since it may need database + with patch('seo_audit.create_engine'), \ + patch('seo_audit.sessionmaker'): + from seo_audit import SEOAuditor + self.auditor = SEOAuditor.__new__(SEOAuditor) + self.auditor.engine = Mock() + self.auditor.Session = Mock() + self.auditor.pagespeed_client = Mock() + self.auditor.onpage_analyzer = OnPageSEOAnalyzer() + self.auditor.technical_checker = Mock() + self.auditor.session = Mock() + + def test_calculate_onpage_score_perfect(self): + """Test on-page score calculation with perfect page.""" + onpage = { + 'meta_tags': { + 'title': 'Perfect Title for SEO Optimization', + 'title_length': 38, + 'description': 'This is a perfect meta description that is between 120 and 160 characters long for optimal SEO results.', + 'description_length': 105, + 'canonical_url': 'https://example.com/page', + }, + 'headings': { + 'h1_count': 1, + 'has_proper_hierarchy': True, + }, + 'images': { + 'total_images': 10, + 'images_without_alt': 0, + }, + 'structured_data': { + 'has_structured_data': True, + }, + 'open_graph': { + 'og_title': 'OG Title', + }, + } + + score = self.auditor._calculate_onpage_score(onpage) + + # Should be high score with minor deductions + self.assertGreaterEqual(score, 90) + + def test_calculate_onpage_score_missing_title(self): + """Test on-page score with missing title.""" + onpage = { + 'meta_tags': { + 'title': None, + 'description': 'Some description', + 'description_length': 50, + }, + 'headings': {'h1_count': 1, 'has_proper_hierarchy': True}, + 'images': {'total_images': 0, 'images_without_alt': 0}, + 'structured_data': {'has_structured_data': False}, + 'open_graph': {}, + } + + score = self.auditor._calculate_onpage_score(onpage) + + # Should have significant deduction for missing title (-15) + self.assertLessEqual(score, 85) + + def test_calculate_onpage_score_missing_h1(self): + """Test on-page score with missing H1.""" + onpage = { + 'meta_tags': { + 'title': 'Good Title', + 'title_length': 10, + 'description': 'Good description', + 'description_length': 50, + }, + 'headings': {'h1_count': 0, 'has_proper_hierarchy': False}, + 'images': {'total_images': 0, 'images_without_alt': 0}, + 'structured_data': {'has_structured_data': False}, + 'open_graph': {}, + } + + score = self.auditor._calculate_onpage_score(onpage) + + # Should have deduction for missing H1 (-10) and no structured data (-5) + self.assertLessEqual(score, 85) + + def test_calculate_technical_score_perfect(self): + """Test technical score with perfect setup.""" + technical = { + 'robots_txt': { + 'exists': True, + 'blocks_googlebot': False, + }, + 'sitemap': { + 'exists': True, + 'is_valid_xml': True, + }, + 'redirect_chain': { + 'chain_length': 0, + 'has_redirect_loop': False, + }, + 'indexability': { + 'is_indexable': True, + }, + 'canonical': { + 'has_canonical': True, + 'points_to_different_domain': False, + }, + } + + score = self.auditor._calculate_technical_score(technical) + + self.assertEqual(score, 100) + + def test_calculate_technical_score_no_robots(self): + """Test technical score without robots.txt.""" + technical = { + 'robots_txt': {'exists': False}, + 'sitemap': {'exists': True, 'is_valid_xml': True}, + 'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False}, + 'indexability': {'is_indexable': True}, + 'canonical': {'has_canonical': True, 'points_to_different_domain': False}, + } + + score = self.auditor._calculate_technical_score(technical) + + # -10 for missing robots.txt + self.assertEqual(score, 90) + + def test_calculate_technical_score_blocks_googlebot(self): + """Test technical score when blocking Googlebot.""" + technical = { + 'robots_txt': {'exists': True, 'blocks_googlebot': True}, + 'sitemap': {'exists': True, 'is_valid_xml': True}, + 'redirect_chain': {'chain_length': 0, 'has_redirect_loop': False}, + 'indexability': {'is_indexable': True}, + 'canonical': {'has_canonical': True, 'points_to_different_domain': False}, + } + + score = self.auditor._calculate_technical_score(technical) + + # -20 for blocking Googlebot + self.assertEqual(score, 80) + + +class TestSEOAuditResultCategorization(unittest.TestCase): + """Tests for result categorization logic.""" + + def setUp(self): + """Set up with mocked auditor.""" + with patch('seo_audit.create_engine'), \ + patch('seo_audit.sessionmaker'): + from seo_audit import SEOAuditor + self.auditor = SEOAuditor.__new__(SEOAuditor) + + def test_categorize_success(self): + """Test categorizing successful audit.""" + result = { + 'errors': [], + 'http_status': 200, + 'onpage': {'meta_tags': {}}, + 'technical': {}, + } + + category = self.auditor._categorize_result(result) + self.assertEqual(category, 'success') + + def test_categorize_no_website(self): + """Test categorizing company with no website.""" + result = { + 'errors': ['No website URL configured'], + } + + category = self.auditor._categorize_result(result) + self.assertEqual(category, 'no_website') + + def test_categorize_timeout(self): + """Test categorizing timeout error.""" + result = { + 'errors': ['Timeout after 30s'], + } + + category = self.auditor._categorize_result(result) + self.assertEqual(category, 'timeout') + + def test_categorize_connection_error(self): + """Test categorizing connection error.""" + result = { + 'errors': ['Connection error: Failed to establish connection'], + } + + category = self.auditor._categorize_result(result) + self.assertEqual(category, 'connection_error') + + def test_categorize_ssl_error(self): + """Test categorizing SSL error.""" + result = { + 'errors': ['SSL Error: Certificate verify failed'], + } + + category = self.auditor._categorize_result(result) + self.assertEqual(category, 'ssl_error') + + def test_categorize_http_error(self): + """Test categorizing HTTP error (4xx/5xx).""" + result = { + 'errors': ['HTTP 404'], + 'http_status': 404, + } + + category = self.auditor._categorize_result(result) + self.assertEqual(category, 'unavailable') + + +class TestParseBatchArgument(unittest.TestCase): + """Tests for batch argument parsing.""" + + def test_parse_valid_batch(self): + """Test parsing valid batch argument.""" + from seo_audit import parse_batch_argument + + start, end = parse_batch_argument('1-10') + self.assertEqual(start, 1) + self.assertEqual(end, 10) + + def test_parse_batch_with_spaces(self): + """Test parsing batch with spaces.""" + from seo_audit import parse_batch_argument + + start, end = parse_batch_argument(' 5 - 20 ') + self.assertEqual(start, 5) + self.assertEqual(end, 20) + + def test_parse_invalid_format_no_dash(self): + """Test parsing batch without dash fails.""" + from seo_audit import parse_batch_argument + + with self.assertRaises(ValueError) as ctx: + parse_batch_argument('10') + self.assertIn('Invalid batch format', str(ctx.exception)) + + def test_parse_invalid_format_multiple_dashes(self): + """Test parsing batch with multiple dashes fails.""" + from seo_audit import parse_batch_argument + + with self.assertRaises(ValueError) as ctx: + parse_batch_argument('1-5-10') + self.assertIn('Invalid batch format', str(ctx.exception)) + + def test_parse_invalid_values_not_numbers(self): + """Test parsing batch with non-numeric values fails.""" + from seo_audit import parse_batch_argument + + with self.assertRaises(ValueError) as ctx: + parse_batch_argument('a-b') + self.assertIn('Invalid batch values', str(ctx.exception)) + + def test_parse_invalid_start_less_than_one(self): + """Test parsing batch with start < 1 fails.""" + from seo_audit import parse_batch_argument + + with self.assertRaises(ValueError) as ctx: + parse_batch_argument('0-10') + self.assertIn('Must be >= 1', str(ctx.exception)) + + def test_parse_invalid_end_less_than_start(self): + """Test parsing batch with end < start fails.""" + from seo_audit import parse_batch_argument + + with self.assertRaises(ValueError) as ctx: + parse_batch_argument('10-5') + self.assertIn('END must be >= START', str(ctx.exception)) + + +# ============================================================================ +# Helper Function Tests +# ============================================================================ + +class TestConvenienceFunctions(unittest.TestCase): + """Tests for convenience functions.""" + + def test_analyze_html_function(self): + """Test analyze_html convenience function.""" + from seo_analyzer import analyze_html + + html = 'Test

Hello

' + result = analyze_html(html, base_url='https://example.com') + + self.assertIsInstance(result, dict) + self.assertEqual(result['meta_tags']['title'], 'Test') + self.assertEqual(result['headings']['h1_count'], 1) + + +# ============================================================================ +# Run Tests +# ============================================================================ + +if __name__ == '__main__': + # Run with verbose output + unittest.main(verbosity=2)