nordabiz/utils/markdown.py

"""
Simple Markdown Parser for Forum
================================

Converts basic markdown to safe HTML.
Supports: bold, italic, code, links, auto-links, lists, quotes, @mentions
"""

import re
from markupsafe import Markup, escape


def parse_forum_markdown(text):
    """
    Convert markdown text to safe HTML.

    Supported syntax:
    - **bold** or __bold__
    - *italic* or _italic_
    - `inline code`
    - [link text](url)
    - bare https://... URLs (auto-linked)
    - - list items
    - > quotes
    - @mentions (highlighted)

    Args:
        text: Raw markdown text

    Returns:
        Markup object with safe HTML
    """
    if not text:
        return Markup('')

    # Normalize line endings (Windows \r\n -> \n)
    text = text.replace('\r\n', '\n').replace('\r', '\n')

    # Escape HTML first for security
    text = str(escape(text))

    # Process line by line for block elements
    lines = text.split('\n')
    result_lines = []
    in_list = False
    in_quote = False

    for line in lines:
        stripped = line.strip()

        # Skip empty lines but preserve paragraph spacing
        if not stripped:
            if in_list:
                result_lines.append('</ul>')
                in_list = False
            if in_quote:
                result_lines.append('</blockquote>')
                in_quote = False
            result_lines.append('')
            continue

        # Quote blocks (> text)
        if stripped.startswith('&gt; '):  # Escaped >
            if not in_quote:
                result_lines.append('<blockquote class="forum-quote">')
                in_quote = True
            result_lines.append(stripped[5:])  # Remove &gt; prefix
            continue
        elif in_quote:
            result_lines.append('</blockquote>')
            in_quote = False

        # List items (- text)
        if stripped.startswith('- '):
            if not in_list:
                result_lines.append('<ul class="forum-list">')
                in_list = True
            result_lines.append(f'<li>{stripped[2:]}</li>')
            continue
        elif in_list:
            result_lines.append('</ul>')
            in_list = False

        result_lines.append(stripped)

    # Close open blocks
    if in_list:
        result_lines.append('</ul>')
    if in_quote:
        result_lines.append('</blockquote>')

    text = '\n'.join(result_lines)

    # Inline formatting (order matters!)

    # Code blocks (``` ... ```)
    text = re.sub(
        r'```(.*?)```',
        r'<pre class="forum-code-block"><code>\1</code></pre>',
        text,
        flags=re.DOTALL
    )

    # Inline code (`code`)
    text = re.sub(r'`([^`]+)`', r'<code class="forum-code">\1</code>', text)

    # Bold (**text** or __text__)
    text = re.sub(r'\*\*([^*]+)\*\*', r'<strong>\1</strong>', text)
    text = re.sub(r'__([^_]+)__', r'<strong>\1</strong>', text)

    # Italic (*text* or _text_) - careful not to match bold
    text = re.sub(r'(?<!\*)\*([^*]+)\*(?!\*)', r'<em>\1</em>', text)
    text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'<em>\1</em>', text)

    # Links [text](url) - only allow http/https
    def safe_link(match):
        link_text = match.group(1)
        url = match.group(2)
        if url.startswith(('http://', 'https://', '/')):
            return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{link_text}</a>'
        return match.group(0)  # Return original if not safe

    text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', safe_link, text)

    # Auto-link bare URLs (must come after [text](url) so already-linked URLs aren't doubled)
    text = re.sub(
        r'(?<!["\'>=/])(?<!\()https?://[^\s<\)]+',
        lambda m: f'<a href="{m.group(0)}" target="_blank" rel="noopener noreferrer" class="forum-link">{m.group(0)}</a>',
        text
    )

    # @mentions - highlight them
    text = re.sub(
        r'@([\w.\-]+)',
        r'<span class="forum-mention">@\1</span>',
        text
    )

    # Convert newlines to <br> but skip lines that are HTML block elements
    lines = text.split('\n')
    output = []
    for i, line in enumerate(lines):
        output.append(line)
        # Don't add <br> after block elements or before them
        if i < len(lines) - 1:
            stripped = line.strip()
            next_stripped = lines[i + 1].strip() if i + 1 < len(lines) else ''
            is_block = any(stripped.startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>'])
            next_is_block = any(next_stripped.startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>'])
            if not is_block and not next_is_block:
                output.append('<br>')

    return Markup('\n'.join(output))


def register_markdown_filter(app):
    """Register the markdown filter with Flask app."""
    app.jinja_env.filters['forum_markdown'] = parse_forum_markdown