""" Founding History Formatter ========================== Converts raw founding_history text (with emoji section markers) into structured HTML cards. Handles three formats: 1. Emoji-sectioned text (from KRS/AI enrichment) 2. Plain text with newlines 3. HTML (from Quill editor) - passed through unchanged """ import re from markupsafe import Markup, escape # Section markers: emoji → (css_class, icon_color_gradient) SECTION_MAP = { '🏢': ('section-board', '#1e3050', '#2E4872'), '👥': ('section-shareholders', '#7c3aed', '#6d28d9'), '📋': ('section-registry', '#0369a1', '#0284c7'), '📊': ('section-finance', '#059669', '#10b981'), '📝': ('section-profile', '#d97706', '#f59e0b'), } EMOJI_PATTERN = re.compile(r'^(' + '|'.join(re.escape(e) for e in SECTION_MAP) + r')\s*(.+)$') def format_founding_history(text): """Convert founding_history to structured HTML.""" if not text: return '' text = text.strip() # Already HTML (from Quill editor) — pass through if '

' in text or '

' in text or '
' in text: return Markup(text) # Check if it has emoji section markers has_sections = any(emoji in text for emoji in SECTION_MAP) if not has_sections: # Plain text — just convert newlines to
and bullet points return Markup(_format_plain_text(text)) # Parse emoji-sectioned text return Markup(_format_sectioned_text(text)) def _format_plain_text(text): """Format plain text with newlines and bullet points.""" escaped = escape(text) # Convert bullet points result = str(escaped).replace('• ', '
  • ') if '') in_list = False continue if '') in_list = True formatted.append(line + '
  • ') else: if in_list: formatted.append('') in_list = False formatted.append(f'

    {line}

    ') if in_list: formatted.append('') return '\n'.join(formatted) return str(escaped).replace('\n', '
    ') def _format_sectioned_text(text): """Parse emoji-sectioned text into card-based HTML.""" sections = [] current_emoji = None current_title = None current_lines = [] for line in text.split('\n'): line = line.strip() if not line: continue match = EMOJI_PATTERN.match(line) if match: # Save previous section if current_emoji: sections.append((current_emoji, current_title, current_lines)) current_emoji = match.group(1) # Clean title: remove trailing colon, normalize case title = match.group(2).rstrip(':') current_title = title current_lines = [] else: current_lines.append(line) # Save last section if current_emoji: sections.append((current_emoji, current_title, current_lines)) if not sections: return _format_plain_text(text) html_parts = ['
    '] for emoji, title, lines in sections: css_class = SECTION_MAP.get(emoji, ('section-default', '#6b7280', '#9ca3af'))[0] color1 = SECTION_MAP.get(emoji, ('', '#6b7280', '#9ca3af'))[1] color2 = SECTION_MAP.get(emoji, ('', '#6b7280', '#9ca3af'))[2] html_parts.append(f'
    ') html_parts.append( f'
    ' f'{emoji}' f'{escape(title)}' f'
    ' ) if lines: # Check if lines are bullet points bullet_lines = [l for l in lines if l.startswith('• ')] non_bullet = [l for l in lines if not l.startswith('• ')] if bullet_lines: html_parts.append('
      ') for bl in bullet_lines: content = escape(bl[2:]) # Remove "• " # Highlight key-value pairs (e.g., "KRS: 123") content = _highlight_kv(str(content)) html_parts.append(f'
    • {content}
    • ') html_parts.append('
    ') for nl in non_bullet: content = escape(nl) html_parts.append(f'

    {content}

    ') html_parts.append('
    ') html_parts.append('
    ') return '\n'.join(html_parts) def _highlight_kv(text): """Highlight key-value pairs like 'KRS: 0000328525' with bold keys.""" # Match patterns like "Key: value" but only for known keys known_keys = [ 'KRS', 'NIP', 'REGON', 'EBITDA', 'EBIT', 'Data rejestracji', 'Kapitał zakładowy', 'Siedziba', 'Reprezentacja', 'Wiarygodność płatnicza', 'Działalność' ] for key in known_keys: pattern = re.compile(rf'({re.escape(key)}:\s*)') text = pattern.sub(rf'\1', text) return text def register_history_filter(app): """Register the Jinja2 filter.""" app.jinja_env.filters['format_history'] = format_founding_history