nordabiz/tests/unit/test_link_preview.py
Maciej Pienczyn bca1decf97
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
test(messages): add unit tests for conversation models and link preview
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-27 13:31:44 +01:00

263 lines
9.8 KiB
Python

"""
Unit Tests — Link Preview
==========================
Tests for blueprints/messages/link_preview.py:
- OGParser with og: meta tags
- OGParser fallback to <title> and meta description
- OGParser with no meta tags
- fetch_link_preview with no URL
- fetch_link_preview skips internal URLs
- fetch_link_preview success (mocked HTTP)
- fetch_link_preview timeout handling
- fetch_link_preview non-HTML content-type
- URL extraction from HTML anchor tags
"""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
import pytest
from unittest.mock import patch, MagicMock
from requests.exceptions import Timeout
from blueprints.messages.link_preview import fetch_link_preview, OGParser
# ============================================================
# OGParser Tests
# ============================================================
class TestOGParser:
"""Test OGParser HTML parsing."""
def test_parses_og_title_description_image(self):
html = """
<html><head>
<meta property="og:title" content="Test Title">
<meta property="og:description" content="Test Description">
<meta property="og:image" content="https://example.com/image.jpg">
</head></html>
"""
parser = OGParser()
parser.feed(html)
assert parser.og['title'] == 'Test Title'
assert parser.og['description'] == 'Test Description'
assert parser.og['image'] == 'https://example.com/image.jpg'
def test_fallback_to_title_tag_and_meta_description(self):
html = """
<html><head>
<title>Fallback Title</title>
<meta name="description" content="Fallback Description">
</head></html>
"""
parser = OGParser()
parser.feed(html)
assert parser.title == 'Fallback Title'
assert parser.og.get('description') == 'Fallback Description'
assert 'title' not in parser.og # og:title not set
def test_empty_html_returns_title_from_title_tag(self):
html = "<html><head><title>Only Title</title></head></html>"
parser = OGParser()
parser.feed(html)
assert parser.title == 'Only Title'
assert parser.og.get('description') is None
assert parser.og.get('image') is None
def test_no_meta_tags_empty_og(self):
html = "<html><head></head><body>No meta here</body></html>"
parser = OGParser()
parser.feed(html)
assert parser.og == {}
assert parser.title is None
def test_og_description_takes_precedence_over_meta_description(self):
html = """
<html><head>
<meta property="og:description" content="OG Desc">
<meta name="description" content="Meta Desc">
</head></html>
"""
parser = OGParser()
parser.feed(html)
assert parser.og['description'] == 'OG Desc'
# ============================================================
# fetch_link_preview Tests
# ============================================================
class TestFetchLinkPreview:
"""Test fetch_link_preview function."""
def test_returns_none_for_none_text(self):
result = fetch_link_preview(None)
assert result is None
def test_returns_none_for_empty_text(self):
result = fetch_link_preview('')
assert result is None
def test_returns_none_when_no_url_in_text(self):
result = fetch_link_preview('Cześć, jak się masz?')
assert result is None
def test_returns_none_for_internal_nordabiznes_url(self):
result = fetch_link_preview('Sprawdź https://nordabiznes.pl/company/test')
assert result is None
def test_returns_none_for_staging_internal_url(self):
result = fetch_link_preview('Link: https://staging.nordabiznes.pl/company/foo')
assert result is None
def test_returns_none_for_localhost_url(self):
result = fetch_link_preview('Dev: http://localhost:5000/test')
assert result is None
def test_success_returns_dict_with_og_data(self):
html = """<html><head>
<meta property="og:title" content="Example Title">
<meta property="og:description" content="Example Description">
<meta property="og:image" content="https://example.com/img.jpg">
</head></html>"""
mock_resp = MagicMock()
mock_resp.headers = {'content-type': 'text/html; charset=utf-8'}
mock_resp.text = html
mock_resp.raise_for_status = MagicMock()
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
result = fetch_link_preview('Check out https://example.com')
assert result is not None
assert result['url'] == 'https://example.com'
assert result['title'] == 'Example Title'
assert result['description'] == 'Example Description'
assert result['image'] == 'https://example.com/img.jpg'
def test_success_uses_title_tag_fallback(self):
html = "<html><head><title>Page Title</title></head></html>"
mock_resp = MagicMock()
mock_resp.headers = {'content-type': 'text/html'}
mock_resp.text = html
mock_resp.raise_for_status = MagicMock()
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
result = fetch_link_preview('See https://example.com for details')
assert result is not None
assert result['title'] == 'Page Title'
def test_returns_none_on_timeout(self):
with patch('blueprints.messages.link_preview.requests.get', side_effect=Timeout):
result = fetch_link_preview('Visit https://slow-site.example.com')
assert result is None
def test_returns_none_for_non_html_content_type(self):
mock_resp = MagicMock()
mock_resp.headers = {'content-type': 'application/pdf'}
mock_resp.text = '%PDF-1.4 binary content'
mock_resp.raise_for_status = MagicMock()
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
result = fetch_link_preview('Download https://example.com/doc.pdf')
assert result is None
def test_returns_none_when_page_has_no_title(self):
html = "<html><head><meta name='robots' content='noindex'></head></html>"
mock_resp = MagicMock()
mock_resp.headers = {'content-type': 'text/html'}
mock_resp.text = html
mock_resp.raise_for_status = MagicMock()
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
result = fetch_link_preview('Visit https://example.com')
assert result is None
def test_title_truncated_to_200_chars(self):
long_title = 'A' * 300
html = f"<html><head><title>{long_title}</title></head></html>"
mock_resp = MagicMock()
mock_resp.headers = {'content-type': 'text/html'}
mock_resp.text = html
mock_resp.raise_for_status = MagicMock()
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
result = fetch_link_preview('https://example.com')
assert result is not None
assert len(result['title']) <= 200
def test_description_truncated_to_300_chars(self):
long_desc = 'B' * 400
html = f"""<html><head>
<title>Title</title>
<meta name="description" content="{long_desc}">
</head></html>"""
mock_resp = MagicMock()
mock_resp.headers = {'content-type': 'text/html'}
mock_resp.text = html
mock_resp.raise_for_status = MagicMock()
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
result = fetch_link_preview('https://example.com')
assert result is not None
assert len(result['description']) <= 300
# ============================================================
# URL Extraction from HTML Content Tests
# ============================================================
class TestURLExtractionFromHTML:
"""Test that URLs inside HTML anchor tags are correctly found."""
def test_extracts_url_from_anchor_tag(self):
"""URL inside <a href> is extracted after stripping HTML tags."""
text = '<a href="https://external-site.com/page">Visit site</a>'
# The function strips HTML tags before extracting URLs,
# so href URL is not extracted — only bare URLs in text are.
# This test verifies the stripping behavior: no URL in visible text → None.
result = fetch_link_preview(text)
# After stripping tags, text is "Visit site" — no URL → None
assert result is None
def test_extracts_bare_url_from_mixed_html(self):
"""Bare URL in text alongside HTML is extracted correctly."""
text = '<p>Check out https://example.com/news for more</p>'
mock_resp = MagicMock()
mock_resp.headers = {'content-type': 'text/html'}
mock_resp.text = '<html><head><title>News</title></head></html>'
mock_resp.raise_for_status = MagicMock()
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
result = fetch_link_preview(text)
assert result is not None
assert result['url'] == 'https://example.com/news'
def test_first_url_is_used_when_multiple_urls_present(self):
"""When text contains multiple URLs, the first one is used."""
text = 'First: https://first.example.com and second: https://second.example.com'
mock_resp = MagicMock()
mock_resp.headers = {'content-type': 'text/html'}
mock_resp.text = '<html><head><title>First</title></head></html>'
mock_resp.raise_for_status = MagicMock()
with patch('blueprints.messages.link_preview.requests.get', return_value=mock_resp):
result = fetch_link_preview(text)
assert result is not None
assert result['url'] == 'https://first.example.com'