""" Unit Tests — Link Preview ========================== Tests for blueprints/messages/link_preview.py: - OGParser with og: meta tags - OGParser fallback to and meta description - OGParser with no meta tags - fetch_link_preview with no URL - fetch_link_preview skips internal URLs - fetch_link_preview success (mocked HTTP) - fetch_link_preview timeout handling - fetch_link_preview non-HTML content-type - URL extraction from HTML anchor tags """ import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) import pytest from unittest.mock import patch, MagicMock from requests.exceptions import Timeout from blueprints.messages.link_preview import fetch_link_preview, OGParser # ============================================================ # OGParser Tests # ============================================================ class TestOGParser: """Test OGParser HTML parsing.""" def test_parses_og_title_description_image(self): html = """ <html><head> <meta property="og:title" content="Test Title"> <meta property="og:description" content="Test Description"> <meta property="og:image" content="https://example.com/image.jpg"> </head></html> """ parser = OGParser() parser.feed(html) assert parser.og['title'] == 'Test Title' assert parser.og['description'] == 'Test Description' assert parser.og['image'] == 'https://example.com/image.jpg' def test_fallback_to_title_tag_and_meta_description(self): html = """ <html><head> <title>Fallback Title """ parser = OGParser() parser.feed(html) assert parser.title == 'Fallback Title' assert parser.og.get('description') == 'Fallback Description' assert 'title' not in parser.og # og:title not set def test_empty_html_returns_title_from_title_tag(self): html = "Only Title" parser = OGParser() parser.feed(html) assert parser.title == 'Only Title' assert parser.og.get('description') is None assert parser.og.get('image') is None def test_no_meta_tags_empty_og(self): html = "No meta here" parser = OGParser() parser.feed(html) assert parser.og == {} assert parser.title is None def test_og_description_takes_precedence_over_meta_description(self): html = """ """ parser = OGParser() parser.feed(html) assert parser.og['description'] == 'OG Desc' # ============================================================ # fetch_link_preview Tests # ============================================================ class TestFetchLinkPreview: """Test fetch_link_preview function.""" def test_returns_none_for_none_text(self): result = fetch_link_preview(None) assert result is None def test_returns_none_for_empty_text(self): result = fetch_link_preview('') assert result is None def test_returns_none_when_no_url_in_text(self): result = fetch_link_preview('Cześć, jak się masz?') assert result is None def test_returns_none_for_internal_nordabiznes_url(self): result = fetch_link_preview('Sprawdź https://nordabiznes.pl/company/test') assert result is None def test_returns_none_for_staging_internal_url(self): result = fetch_link_preview('Link: https://staging.nordabiznes.pl/company/foo') assert result is None def test_returns_none_for_localhost_url(self): result = fetch_link_preview('Dev: http://localhost:5000/test') assert result is None def test_success_returns_dict_with_og_data(self): html = """ """ mock_resp = MagicMock() mock_resp.headers = {'content-type': 'text/html; charset=utf-8'} mock_resp.text = html mock_resp.raise_for_status = MagicMock() with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp): result = fetch_link_preview('Check out https://example.com') assert result is not None assert result['url'] == 'https://example.com' assert result['title'] == 'Example Title' assert result['description'] == 'Example Description' assert result['image'] == 'https://example.com/img.jpg' def test_success_uses_title_tag_fallback(self): html = "Page Title" mock_resp = MagicMock() mock_resp.headers = {'content-type': 'text/html'} mock_resp.text = html mock_resp.raise_for_status = MagicMock() with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp): result = fetch_link_preview('See https://example.com for details') assert result is not None assert result['title'] == 'Page Title' def test_returns_none_on_timeout(self): with patch('blueprints.messages.link_preview.requests.get', side_effect=Timeout): result = fetch_link_preview('Visit https://slow-site.example.com') assert result is None def test_returns_none_for_non_html_content_type(self): mock_resp = MagicMock() mock_resp.headers = {'content-type': 'application/pdf'} mock_resp.text = '%PDF-1.4 binary content' mock_resp.raise_for_status = MagicMock() with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp): result = fetch_link_preview('Download https://example.com/doc.pdf') assert result is None def test_returns_none_when_page_has_no_title(self): html = "" mock_resp = MagicMock() mock_resp.headers = {'content-type': 'text/html'} mock_resp.text = html mock_resp.raise_for_status = MagicMock() with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp): result = fetch_link_preview('Visit https://example.com') assert result is None def test_title_truncated_to_200_chars(self): long_title = 'A' * 300 html = f"{long_title}" mock_resp = MagicMock() mock_resp.headers = {'content-type': 'text/html'} mock_resp.text = html mock_resp.raise_for_status = MagicMock() with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp): result = fetch_link_preview('https://example.com') assert result is not None assert len(result['title']) <= 200 def test_description_truncated_to_300_chars(self): long_desc = 'B' * 400 html = f""" Title """ mock_resp = MagicMock() mock_resp.headers = {'content-type': 'text/html'} mock_resp.text = html mock_resp.raise_for_status = MagicMock() with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp): result = fetch_link_preview('https://example.com') assert result is not None assert len(result['description']) <= 300 # ============================================================ # URL Extraction from HTML Content Tests # ============================================================ class TestURLExtractionFromHTML: """Test that URLs inside HTML anchor tags are correctly found.""" def test_extracts_url_from_anchor_tag(self): """URL inside is extracted after stripping HTML tags.""" text = 'Visit site' # The function strips HTML tags before extracting URLs, # so href URL is not extracted — only bare URLs in text are. # This test verifies the stripping behavior: no URL in visible text → None. result = fetch_link_preview(text) # After stripping tags, text is "Visit site" — no URL → None assert result is None def test_extracts_bare_url_from_mixed_html(self): """Bare URL in text alongside HTML is extracted correctly.""" text = '

Check out https://example.com/news for more

' mock_resp = MagicMock() mock_resp.headers = {'content-type': 'text/html'} mock_resp.text = 'News' mock_resp.raise_for_status = MagicMock() with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp): result = fetch_link_preview(text) assert result is not None assert result['url'] == 'https://example.com/news' def test_first_url_is_used_when_multiple_urls_present(self): """When text contains multiple URLs, the first one is used.""" text = 'First: https://first.example.com and second: https://second.example.com' mock_resp = MagicMock() mock_resp.headers = {'content-type': 'text/html'} mock_resp.text = 'First' mock_resp.raise_for_status = MagicMock() with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp): result = fetch_link_preview(text) assert result is not None assert result['url'] == 'https://first.example.com'