"""
Helper Functions
================
Common utility functions used across blueprints.
"""
import re
import logging
import bleach
logger = logging.getLogger(__name__)
# Allowed HTML tags and attributes for rich-text content (announcements, events, proceedings)
_ALLOWED_TAGS = ['p', 'br', 'strong', 'em', 'b', 'i', 'a', 'ul', 'ol', 'li', 'h3', 'h4', 'blockquote', 'img']
_ALLOWED_ATTRS = {'a': ['href', 'target', 'rel'], 'img': ['src', 'alt', 'width', 'height', 'style']}
_ALLOWED_PROTOCOLS = ['http', 'https', 'data'] # data: for base64 inline images
def sanitize_html(content):
"""
Sanitize HTML content to prevent stored XSS.
Strips all tags except a safe whitelist.
Args:
content: HTML string to sanitize
Returns:
Sanitized HTML string
"""
if not content:
return content
return bleach.clean(content, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRS, protocols=_ALLOWED_PROTOCOLS, strip=True)
def linkify_urls(html):
"""
Auto-link URLs in HTML content that are not already inside or
tags.
Links to nordabiznes.pl open in new tab as trusted internal links.
"""
if not html:
return html
from markupsafe import Markup
# Split HTML into tags and text, only process text outside /
tags
url_pattern = re.compile(r'(https?://[^\s<>"\']+)')
tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>')
result = []
pos = 0
in_a_tag = False
for match in tag_pattern.finditer(html):
start, end = match.start(), match.end()
is_closing = match.group(1) == '/'
tag_name = match.group(2).lower()
# Process text before this tag
if start > pos:
text_chunk = html[pos:start]
if in_a_tag:
result.append(text_chunk)
else:
result.append(url_pattern.sub(
lambda m: '{0}'.format(m.group(0)),
text_chunk
))
result.append(match.group(0))
pos = end
if tag_name in ('a', 'img'):
in_a_tag = not is_closing
# Process remaining text
if pos < len(html):
text_chunk = html[pos:]
if not in_a_tag:
text_chunk = url_pattern.sub(
lambda m: '{0}'.format(m.group(0)),
text_chunk
)
result.append(text_chunk)
return Markup(''.join(result))
def sanitize_input(text, max_length=1000):
"""
Sanitize user input - remove potentially dangerous characters.
Args:
text: Input string to sanitize
max_length: Maximum allowed length (default 1000)
Returns:
Sanitized string
"""
if not text:
return ""
# Remove null bytes
text = text.replace('\x00', '')
# Trim to max length
text = text[:max_length]
# Strip whitespace
text = text.strip()
return text
def validate_email(email):
"""
Validate email format.
Args:
email: Email address to validate
Returns:
bool: True if valid, False otherwise
"""
if not email or len(email) > 255:
return False
# RFC 5322 compliant email regex (simplified)
pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
return re.match(pattern, email) is not None
def validate_password(password):
"""
Validate password strength.
Requirements:
- Minimum 8 characters
- At least one uppercase letter
- At least one lowercase letter
- At least one digit
Args:
password: Password to validate
Returns:
tuple: (is_valid: bool, message: str)
"""
if not password or len(password) < 8:
return False, "Hasło musi mieć minimum 8 znaków"
if not re.search(r'[A-Z]', password):
return False, "Hasło musi zawierać przynajmniej jedną wielką literę"
if not re.search(r'[a-z]', password):
return False, "Hasło musi zawierać przynajmniej jedną małą literę"
if not re.search(r'\d', password):
return False, "Hasło musi zawierać przynajmniej jedną cyfrę"
return True, "OK"
def ensure_url(url):
"""
Ensure URL has http:// or https:// scheme.
Args:
url: URL string
Returns:
URL with https:// prefix if no scheme present
"""
if url and not url.startswith(('http://', 'https://')):
return f'https://{url}'
return url