""" Helper Functions ================ Common utility functions used across blueprints. """ import re import logging import bleach logger = logging.getLogger(__name__) # Allowed HTML tags and attributes for rich-text content (announcements, events, proceedings) _ALLOWED_TAGS = ['p', 'br', 'strong', 'em', 'b', 'i', 'a', 'ul', 'ol', 'li', 'h3', 'h4', 'blockquote', 'img'] _ALLOWED_ATTRS = {'a': ['href', 'target', 'rel'], 'img': ['src', 'alt', 'width', 'height', 'style']} _ALLOWED_PROTOCOLS = ['http', 'https', 'data'] # data: for base64 inline images def sanitize_html(content): """ Sanitize HTML content to prevent stored XSS. Strips all tags except a safe whitelist. Args: content: HTML string to sanitize Returns: Sanitized HTML string """ if not content: return content return bleach.clean(content, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRS, protocols=_ALLOWED_PROTOCOLS, strip=True) def linkify_urls(html): """ Auto-link URLs in HTML content that are not already inside or tags. Links to nordabiznes.pl open in new tab as trusted internal links. """ if not html: return html from markupsafe import Markup # Split HTML into tags and text, only process text outside / tags url_pattern = re.compile(r'(https?://[^\s<>"\']+)') tag_pattern = re.compile(r'<(/?)(\w+)([^>]*)>') result = [] pos = 0 in_a_tag = False for match in tag_pattern.finditer(html): start, end = match.start(), match.end() is_closing = match.group(1) == '/' tag_name = match.group(2).lower() # Process text before this tag if start > pos: text_chunk = html[pos:start] if in_a_tag: result.append(text_chunk) else: result.append(url_pattern.sub( lambda m: '{0}'.format(m.group(0)), text_chunk )) result.append(match.group(0)) pos = end if tag_name in ('a', 'img'): in_a_tag = not is_closing # Process remaining text if pos < len(html): text_chunk = html[pos:] if not in_a_tag: text_chunk = url_pattern.sub( lambda m: '{0}'.format(m.group(0)), text_chunk ) result.append(text_chunk) return Markup(''.join(result)) def sanitize_input(text, max_length=1000): """ Sanitize user input - remove potentially dangerous characters. Args: text: Input string to sanitize max_length: Maximum allowed length (default 1000) Returns: Sanitized string """ if not text: return "" # Remove null bytes text = text.replace('\x00', '') # Trim to max length text = text[:max_length] # Strip whitespace text = text.strip() return text def validate_email(email): """ Validate email format. Args: email: Email address to validate Returns: bool: True if valid, False otherwise """ if not email or len(email) > 255: return False # RFC 5322 compliant email regex (simplified) pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' return re.match(pattern, email) is not None def validate_password(password): """ Validate password strength. Requirements: - Minimum 8 characters - At least one uppercase letter - At least one lowercase letter - At least one digit Args: password: Password to validate Returns: tuple: (is_valid: bool, message: str) """ if not password or len(password) < 8: return False, "Hasło musi mieć minimum 8 znaków" if not re.search(r'[A-Z]', password): return False, "Hasło musi zawierać przynajmniej jedną wielką literę" if not re.search(r'[a-z]', password): return False, "Hasło musi zawierać przynajmniej jedną małą literę" if not re.search(r'\d', password): return False, "Hasło musi zawierać przynajmniej jedną cyfrę" return True, "OK" def ensure_url(url): """ Ensure URL has http:// or https:// scheme. Args: url: URL string Returns: URL with https:// prefix if no scheme present """ if url and not url.startswith(('http://', 'https://')): return f'https://{url}' return url