fix: rewrite forum markdown - autolink before block wrapping, merge paragraph lines

URLs are now linked before being wrapped in <li>/<blockquote>, and consecutive text lines are joined into paragraphs instead of getting individual <br> tags. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 08:57:16 +01:00 · 2026-02-23 08:57:16 +01:00 · d56f1fdae1
commit d56f1fdae1
parent 7cbd3bb1e7
1 changed files with 76 additions and 73 deletions
--- a/utils/markdown.py
+++ b/utils/markdown.py
@ -10,6 +10,15 @@ import re
 from markupsafe import Markup, escape


+def _autolink(text):
+    """Convert bare URLs to clickable links. Works on escaped text before HTML wrapping."""
+    return re.sub(
+        r'https?://[^\s<]+',
+        lambda m: f'<a href="{m.group(0)}" target="_blank" rel="noopener noreferrer" class="forum-link">{m.group(0)}</a>',
+        text
+    )
+
+
 def parse_forum_markdown(text):
    """
    Convert markdown text to safe HTML.
@ -23,12 +32,6 @@ def parse_forum_markdown(text):
    - - list items
    - > quotes
    - @mentions (highlighted)
-
-    Args:
-        text: Raw markdown text
-
-    Returns:
-        Markup object with safe HTML
    """
    if not text:
        return Markup('')
@ -39,59 +42,8 @@ def parse_forum_markdown(text):
    # Escape HTML first for security
    text = str(escape(text))

-    # Process line by line for block elements
-    lines = text.split('\n')
-    result_lines = []
-    in_list = False
-    in_quote = False
-
-    for line in lines:
-        stripped = line.strip()
-
-        # Skip empty lines but preserve paragraph spacing
-        if not stripped:
-            if in_list:
-                result_lines.append('</ul>')
-                in_list = False
-            if in_quote:
-                result_lines.append('</blockquote>')
-                in_quote = False
-            result_lines.append('')
-            continue
-
-        # Quote blocks (> text)
-        if stripped.startswith('&gt; '):  # Escaped >
-            if not in_quote:
-                result_lines.append('<blockquote class="forum-quote">')
-                in_quote = True
-            result_lines.append(stripped[5:])  # Remove &gt; prefix
-            continue
-        elif in_quote:
-            result_lines.append('</blockquote>')
-            in_quote = False
-
-        # List items (- text)
-        if stripped.startswith('- '):
-            if not in_list:
-                result_lines.append('<ul class="forum-list">')
-                in_list = True
-            result_lines.append(f'<li>{stripped[2:]}</li>')
-            continue
-        elif in_list:
-            result_lines.append('</ul>')
-            in_list = False
-
-        result_lines.append(stripped)
-
-    # Close open blocks
-    if in_list:
-        result_lines.append('</ul>')
-    if in_quote:
-        result_lines.append('</blockquote>')
-
-    text = '\n'.join(result_lines)
-
-    # Inline formatting (order matters!)
+    # Apply inline formatting BEFORE block structure
+    # This ensures URLs inside list items get linked

    # Code blocks (``` ... ```)
    text = re.sub(
@ -118,13 +70,13 @@ def parse_forum_markdown(text):
        url = match.group(2)
        if url.startswith(('http://', 'https://', '/')):
            return f'<a href="{url}" target="_blank" rel="noopener noreferrer" class="forum-link">{link_text}</a>'
-        return match.group(0)  # Return original if not safe
+        return match.group(0)

    text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', safe_link, text)

-    # Auto-link bare URLs (must come after [text](url) so already-linked URLs aren't doubled)
+    # Auto-link bare URLs (after [text](url) to avoid doubling)
    text = re.sub(
-        r'(?<!["\'>=/])(?<!\()https?://[^\s<\)]+',
+        r'(?<!href=")(?<!">)https?://[^\s<]+',
        lambda m: f'<a href="{m.group(0)}" target="_blank" rel="noopener noreferrer" class="forum-link">{m.group(0)}</a>',
        text
    )
@ -136,19 +88,70 @@ def parse_forum_markdown(text):
        text
    )

-    # Convert newlines to <br> but skip lines that are HTML block elements
+    # Now process block structure (lists, quotes, paragraphs)
    lines = text.split('\n')
+    result_lines = []
+    in_list = False
+    in_quote = False
+
+    for line in lines:
+        stripped = line.strip()
+
+        # Empty line = paragraph break
+        if not stripped:
+            if in_list:
+                result_lines.append('</ul>')
+                in_list = False
+            if in_quote:
+                result_lines.append('</blockquote>')
+                in_quote = False
+            result_lines.append('<br>')
+            continue
+
+        # Quote blocks (> text) — &gt; because already escaped
+        if stripped.startswith('&gt; '):
+            if not in_quote:
+                result_lines.append('<blockquote class="forum-quote">')
+                in_quote = True
+            result_lines.append(stripped[5:])
+            continue
+        elif in_quote:
+            result_lines.append('</blockquote>')
+            in_quote = False
+
+        # List items (- text)
+        if stripped.startswith('- '):
+            if not in_list:
+                result_lines.append('<ul class="forum-list">')
+                in_list = True
+            result_lines.append(f'<li>{stripped[2:]}</li>')
+            continue
+        elif in_list:
+            result_lines.append('</ul>')
+            in_list = False
+
+        result_lines.append(stripped)
+
+    # Close open blocks
+    if in_list:
+        result_lines.append('</ul>')
+    if in_quote:
+        result_lines.append('</blockquote>')
+
+    # Join with spaces — no extra <br> between lines within same paragraph
+    # Consecutive non-block lines are part of the same paragraph
    output = []
-    for i, line in enumerate(lines):
-        output.append(line)
-        # Don't add <br> after block elements or before them
-        if i < len(lines) - 1:
-            stripped = line.strip()
-            next_stripped = lines[i + 1].strip() if i + 1 < len(lines) else ''
-            is_block = any(stripped.startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>'])
-            next_is_block = any(next_stripped.startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>'])
-            if not is_block and not next_is_block:
-                output.append('<br>')
+    for i, line in enumerate(result_lines):
+        s = line.strip()
+        # Block elements get their own line, no extra spacing
+        if any(s.startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>', '<br>']):
+            output.append(line)
+        else:
+            # Regular text — join with previous regular text using space
+            if output and output[-1] and not any(output[-1].strip().startswith(t) for t in ['<ul', '</ul>', '<li', '</li>', '<blockquote', '</blockquote>', '<pre', '</pre>', '<br>']):
+                output[-1] = output[-1] + ' ' + line
+            else:
+                output.append(line)

    return Markup('\n'.join(output))