nordabiz/utils/helpers.py
Maciej Pienczyn b8f18c94e5
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
feat(messages): auto-linkify URLs in message content
URLs in messages are now automatically converted to clickable links
opening in a new tab. Works for both old plain-text and new Quill
HTML messages. Uses linkify Jinja2 filter that only processes text
nodes outside existing <a>/<img> tags.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-19 12:25:43 +01:00

177 lines
4.4 KiB
Python

"""
Helper Functions
================
Common utility functions used across blueprints.
"""
import re
import logging
import bleach
logger = logging.getLogger(__name__)
# Allowed HTML tags and attributes for rich-text content (announcements, events, proceedings)
_ALLOWED_TAGS = ['p', 'br', 'strong', 'em', 'b', 'i', 'a', 'ul', 'ol', 'li', 'h3', 'h4', 'blockquote', 'img']
_ALLOWED_ATTRS = {'a': ['href', 'target', 'rel'], 'img': ['src', 'alt']}
def sanitize_html(content):
"""
Sanitize HTML content to prevent stored XSS.
Strips all tags except a safe whitelist.
Args:
content: HTML string to sanitize
Returns:
Sanitized HTML string
"""
if not content:
return content
return bleach.clean(content, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRS, strip=True)
def linkify_urls(html):
"""
Auto-link URLs in HTML content that are not already inside <a> or <img> tags.
Links to nordabiznes.pl open in new tab as trusted internal links.
"""
if not html:
return html
from markupsafe import Markup
# Split HTML into tags and text, only process text outside <a>/<img> tags
url_pattern = re.compile(r'(https?://[^\s<>"\']+)')
tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>')
result = []
pos = 0
in_a_tag = False
for match in tag_pattern.finditer(html):
start, end = match.start(), match.end()
is_closing = match.group(1) == '/'
tag_name = match.group(2).lower()
# Process text before this tag
if start > pos:
text_chunk = html[pos:start]
if in_a_tag:
result.append(text_chunk)
else:
result.append(url_pattern.sub(
lambda m: '<a href="{0}" target="_blank" style="color:var(--primary);word-break:break-all;">{0}</a>'.format(m.group(0)),
text_chunk
))
result.append(match.group(0))
pos = end
if tag_name in ('a', 'img'):
in_a_tag = not is_closing
# Process remaining text
if pos < len(html):
text_chunk = html[pos:]
if not in_a_tag:
text_chunk = url_pattern.sub(
lambda m: '<a href="{0}" target="_blank" style="color:var(--primary);word-break:break-all;">{0}</a>'.format(m.group(0)),
text_chunk
)
result.append(text_chunk)
return Markup(''.join(result))
def sanitize_input(text, max_length=1000):
"""
Sanitize user input - remove potentially dangerous characters.
Args:
text: Input string to sanitize
max_length: Maximum allowed length (default 1000)
Returns:
Sanitized string
"""
if not text:
return ""
# Remove null bytes
text = text.replace('\x00', '')
# Trim to max length
text = text[:max_length]
# Strip whitespace
text = text.strip()
return text
def validate_email(email):
"""
Validate email format.
Args:
email: Email address to validate
Returns:
bool: True if valid, False otherwise
"""
if not email or len(email) > 255:
return False
# RFC 5322 compliant email regex (simplified)
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
def validate_password(password):
"""
Validate password strength.
Requirements:
- Minimum 8 characters
- At least one uppercase letter
- At least one lowercase letter
- At least one digit
Args:
password: Password to validate
Returns:
tuple: (is_valid: bool, message: str)
"""
if not password or len(password) < 8:
return False, "Hasło musi mieć minimum 8 znaków"
if not re.search(r'[A-Z]', password):
return False, "Hasło musi zawierać przynajmniej jedną wielką literę"
if not re.search(r'[a-z]', password):
return False, "Hasło musi zawierać przynajmniej jedną małą literę"
if not re.search(r'\d', password):
return False, "Hasło musi zawierać przynajmniej jedną cyfrę"
return True, "OK"
def ensure_url(url):
"""
Ensure URL has http:// or https:// scheme.
Args:
url: URL string
Returns:
URL with https:// prefix if no scheme present
"""
if url and not url.startswith(('http://', 'https://')):
return f'https://{url}'
return url