diff --git a/app.py b/app.py index e7ea739..bd33e7f 100644 --- a/app.py +++ b/app.py @@ -166,6 +166,8 @@ from database import ( SystemRole ) +from utils.decorators import role_required + # Import services import gemini_service from nordabiz_chat import NordaBizChatEngine @@ -938,6 +940,8 @@ def test_error_500(): @app.route('/health/full') +@login_required +@role_required(SystemRole.ADMIN) def health_full(): """ Extended health check - verifies all critical endpoints. diff --git a/blueprints/admin/routes.py b/blueprints/admin/routes.py index c5c9143..52c1689 100644 --- a/blueprints/admin/routes.py +++ b/blueprints/admin/routes.py @@ -27,6 +27,7 @@ from database import ( SystemRole ) from utils.decorators import role_required +from utils.helpers import sanitize_html import gemini_service # Logger @@ -847,7 +848,7 @@ def admin_calendar_new(): event = NordaEvent( title=request.form.get('title', '').strip(), - description=request.form.get('description', '').strip(), + description=sanitize_html(request.form.get('description', '').strip()), event_date=datetime.strptime(request.form.get('event_date'), '%Y-%m-%d').date(), start_time=request.form.get('start_time') or None, end_time=request.form.get('end_time') or None, diff --git a/blueprints/admin/routes_announcements.py b/blueprints/admin/routes_announcements.py index 6edd1f6..ebe8370 100644 --- a/blueprints/admin/routes_announcements.py +++ b/blueprints/admin/routes_announcements.py @@ -15,6 +15,7 @@ from flask_login import login_required, current_user from . import bp from database import SessionLocal, Announcement, SystemRole from utils.decorators import role_required +from utils.helpers import sanitize_html logger = logging.getLogger(__name__) @@ -98,7 +99,7 @@ def admin_announcements_new(): try: title = request.form.get('title', '').strip() excerpt = request.form.get('excerpt', '').strip() - content = request.form.get('content', '').strip() + content = sanitize_html(request.form.get('content', '').strip()) categories = request.form.getlist('categories') if not categories: categories = ['internal'] # Default category @@ -182,7 +183,7 @@ def admin_announcements_edit(id): if request.method == 'POST': announcement.title = request.form.get('title', '').strip() announcement.excerpt = request.form.get('excerpt', '').strip() or None - announcement.content = request.form.get('content', '').strip() + announcement.content = sanitize_html(request.form.get('content', '').strip()) categories = request.form.getlist('categories') if not categories: categories = ['internal'] # Default category diff --git a/blueprints/admin/routes_zopk_knowledge.py b/blueprints/admin/routes_zopk_knowledge.py index ac7620f..e6c19c4 100644 --- a/blueprints/admin/routes_zopk_knowledge.py +++ b/blueprints/admin/routes_zopk_knowledge.py @@ -352,6 +352,7 @@ def admin_zopk_embeddings_stream(): @bp.route('/zopk-api/knowledge/search', methods=['POST']) @login_required +@role_required(SystemRole.ADMIN) def api_zopk_knowledge_search(): """ Semantic search in ZOPK knowledge base. diff --git a/blueprints/board/routes.py b/blueprints/board/routes.py index fd322e0..eb81830 100644 --- a/blueprints/board/routes.py +++ b/blueprints/board/routes.py @@ -27,6 +27,7 @@ from sqlalchemy import desc from . import bp from database import SessionLocal, BoardMeeting, SystemRole, User from utils.decorators import rada_member_required, office_manager_required +from utils.helpers import sanitize_html from datetime import date, time try: @@ -452,6 +453,13 @@ def _handle_meeting_form(db, meeting=None): except json.JSONDecodeError: proceedings = [] + # Sanitize text fields in proceedings to prevent stored XSS + for proc in proceedings: + if isinstance(proc, dict): + for field in ('discussion', 'discussed', 'title'): + if field in proc and isinstance(proc[field], str): + proc[field] = sanitize_html(proc[field]) + # Validate errors = [] if not meeting_number: diff --git a/blueprints/chat/routes.py b/blueprints/chat/routes.py index 34dcf25..951b177 100644 --- a/blueprints/chat/routes.py +++ b/blueprints/chat/routes.py @@ -18,7 +18,6 @@ from database import ( SystemRole ) from nordabiz_chat import NordaBizChatEngine -from extensions import csrf from utils.decorators import member_required # Logger @@ -40,7 +39,6 @@ def chat(): @bp.route('/api/chat/settings', methods=['GET', 'POST']) -@csrf.exempt @login_required @member_required def chat_settings(): @@ -96,7 +94,6 @@ def chat_settings(): @bp.route('/api/chat/start', methods=['POST']) -@csrf.exempt @login_required @member_required def chat_start(): @@ -123,7 +120,6 @@ def chat_start(): @bp.route('/api/chat//message', methods=['POST']) -@csrf.exempt @login_required @member_required def chat_send_message(conversation_id): diff --git a/gemini_service.py b/gemini_service.py index 36e99a7..2222d17 100644 --- a/gemini_service.py +++ b/gemini_service.py @@ -104,7 +104,7 @@ class GeminiService: # Debug: Log API key (masked) if self.api_key: - logger.info(f"API key loaded: {self.api_key[:10]}...{self.api_key[-4:]}") + logger.info("Gemini API key loaded successfully") else: logger.error("API key is None or empty!") diff --git a/requirements.txt b/requirements.txt index 9b970ec..1d3e333 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ Werkzeug==3.1.5 Flask-WTF==1.2.2 Flask-Limiter==4.1.1 cryptography==46.0.4 +bleach==6.3.0 # Database SQLAlchemy==2.0.46 diff --git a/search_service.py b/search_service.py index 5ff38f9..2a544ab 100644 --- a/search_service.py +++ b/search_service.py @@ -263,8 +263,12 @@ class SearchService: """ keywords = self._expand_keywords(query) - # Build tsquery from keywords - tsquery_parts = [f"{kw}:*" for kw in keywords if kw] + # Sanitize keywords for tsquery - keep only word characters (alphanumeric + polish chars) + sanitized_keywords = [re.sub(r'[^\w]', '', kw, flags=re.UNICODE) for kw in keywords] + sanitized_keywords = [kw for kw in sanitized_keywords if kw] + + # Build tsquery from sanitized keywords + tsquery_parts = [f"{kw}:*" for kw in sanitized_keywords] tsquery = ' | '.join(tsquery_parts) # Check if pg_trgm is available @@ -276,7 +280,8 @@ class SearchService: has_trgm = False # Build ILIKE patterns for each keyword (for multi-word searches) - like_patterns = [f'%{kw}%' for kw in keywords if len(kw) > 2] + # Escape LIKE wildcards in user input before wrapping with % + like_patterns = [f'%{kw.replace("%", r"\\%").replace("_", r"\\_")}%' for kw in sanitized_keywords if len(kw) > 2] # Build SQL query with scoring for founding_history matches (owners/founders) if has_trgm: diff --git a/templates/admin/benefits_form.html b/templates/admin/benefits_form.html index 0a5c5db..a156da2 100644 --- a/templates/admin/benefits_form.html +++ b/templates/admin/benefits_form.html @@ -177,6 +177,7 @@
+
Podstawowe informacje
diff --git a/templates/admin/benefits_list.html b/templates/admin/benefits_list.html index d12eaa5..41db3c5 100644 --- a/templates/admin/benefits_list.html +++ b/templates/admin/benefits_list.html @@ -307,6 +307,7 @@
Edytuj + diff --git a/templates/chat.html b/templates/chat.html index ac030b0..1bdc0d8 100755 --- a/templates/chat.html +++ b/templates/chat.html @@ -1534,6 +1534,7 @@ {% block extra_js %} // NordaGPT Chat - State +const csrfToken = document.querySelector('meta[name="csrf-token"]')?.content || ''; let currentConversationId = null; let conversations = []; let currentModel = 'flash'; // Default model (flash = ekonomiczny) @@ -1578,7 +1579,7 @@ async function saveModelPreference(model) { try { await fetch('/api/chat/settings', { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken }, body: JSON.stringify({ model: model }) }); } catch (error) { @@ -1832,7 +1833,8 @@ async function deleteConversation(conversationId) { try { const response = await fetch(`/api/chat/${conversationId}/delete`, { - method: 'DELETE' + method: 'DELETE', + headers: { 'X-CSRFToken': csrfToken } }); const data = await response.json(); @@ -1879,7 +1881,7 @@ async function sendMessage() { if (!currentConversationId) { const startResponse = await fetch('/api/chat/start', { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken }, body: JSON.stringify({ title: message.substring(0, 50) + (message.length > 50 ? '...' : '') }) @@ -1895,7 +1897,7 @@ async function sendMessage() { // Send message with model selection const response = await fetch(`/api/chat/${currentConversationId}/message`, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { 'Content-Type': 'application/json', 'X-CSRFToken': csrfToken }, body: JSON.stringify({ message: message, model: currentModel diff --git a/templates/membership/data_request.html b/templates/membership/data_request.html index 5a4266a..c209dde 100644 --- a/templates/membership/data_request.html +++ b/templates/membership/data_request.html @@ -249,6 +249,7 @@

Wprowadź NIP firmy

+
diff --git a/utils/helpers.py b/utils/helpers.py index 0f14bfb..57f39a1 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -8,8 +8,30 @@ Common utility functions used across blueprints. import re import logging +import bleach + logger = logging.getLogger(__name__) +# Allowed HTML tags and attributes for rich-text content (announcements, events, proceedings) +_ALLOWED_TAGS = ['p', 'br', 'strong', 'em', 'b', 'i', 'a', 'ul', 'ol', 'li', 'h3', 'h4', 'blockquote'] +_ALLOWED_ATTRS = {'a': ['href', 'target', 'rel']} + + +def sanitize_html(content): + """ + Sanitize HTML content to prevent stored XSS. + Strips all tags except a safe whitelist. + + Args: + content: HTML string to sanitize + + Returns: + Sanitized HTML string + """ + if not content: + return content + return bleach.clean(content, tags=_ALLOWED_TAGS, attributes=_ALLOWED_ATTRS, strip=True) + def sanitize_input(text, max_length=1000): """ diff --git a/zopk_knowledge_service.py b/zopk_knowledge_service.py index 4853f8e..7755f60 100644 --- a/zopk_knowledge_service.py +++ b/zopk_knowledge_service.py @@ -1691,9 +1691,8 @@ def find_duplicate_entities( from sqlalchemy import text # Build query with pg_trgm similarity - type_filter = f"AND e1.entity_type = '{entity_type}'" if entity_type else "" - - query = text(f""" + # Use conditional SQL with COALESCE to avoid f-string interpolation + query = text(""" SELECT e1.id as id1, e1.name as name1, e1.entity_type as type1, e1.mentions_count as mentions1, e1.is_verified as verified1, @@ -1715,7 +1714,7 @@ def find_duplicate_entities( OR LOWER(e1.name) LIKE '%' || LOWER(e2.name) || '%' OR LOWER(e2.name) LIKE '%' || LOWER(e1.name) || '%' ) - {type_filter} + AND (:entity_type IS NULL OR e1.entity_type = :entity_type) ORDER BY sim DESC, e1.entity_type, @@ -1723,7 +1722,8 @@ def find_duplicate_entities( LIMIT :limit """) - result = db_session.execute(query, {'min_sim': min_similarity, 'limit': limit}) + params = {'min_sim': min_similarity, 'limit': limit, 'entity_type': entity_type} + result = db_session.execute(query, params) duplicates = [] for row in result: @@ -1970,12 +1970,10 @@ def find_duplicate_facts( """ from sqlalchemy import text - type_filter = f"AND f1.fact_type = '{fact_type}'" if fact_type else "" - # Set similarity threshold and use % operator (uses GiST index) db_session.execute(text("SET pg_trgm.similarity_threshold = :threshold"), {'threshold': min_similarity}) - query = text(f""" + query = text(""" SELECT f1.id as id1, f1.full_text as text1, f1.fact_type as type1, f1.is_verified as verified1, f1.confidence_score as score1, @@ -1985,12 +1983,13 @@ def find_duplicate_facts( FROM zopk_knowledge_facts f1 JOIN zopk_knowledge_facts f2 ON f1.id < f2.id WHERE f1.full_text % f2.full_text - {type_filter} + AND (:fact_type IS NULL OR f1.fact_type = :fact_type) ORDER BY sim DESC, COALESCE(GREATEST(f1.confidence_score, f2.confidence_score), 0) DESC LIMIT :limit """) - result = db_session.execute(query, {'limit': limit}) + params = {'limit': limit, 'fact_type': fact_type} + result = db_session.execute(query, params) duplicates = [] for row in result: @@ -2320,9 +2319,7 @@ def get_timeline_suggestions( # Get milestone facts not yet in timeline # Prioritize: verified, high confidence, has numeric value (dates/amounts) - verified_filter = "AND f.is_verified = TRUE" if only_verified else "" - - suggestions_query = text(f""" + suggestions_query = text(""" SELECT DISTINCT ON (f.id) f.id as fact_id, f.full_text, @@ -2341,7 +2338,7 @@ def get_timeline_suggestions( FROM zopk_knowledge_facts f LEFT JOIN zopk_news n ON n.id = f.source_news_id WHERE f.fact_type = 'milestone' - {verified_filter} + AND (:only_verified = FALSE OR f.is_verified = TRUE) AND NOT EXISTS ( SELECT 1 FROM zopk_milestones m WHERE m.source_news_id = f.source_news_id @@ -2351,7 +2348,8 @@ def get_timeline_suggestions( LIMIT :limit """) - results = db_session.execute(suggestions_query, {'limit': limit}).fetchall() + params = {'limit': limit, 'only_verified': bool(only_verified)} + results = db_session.execute(suggestions_query, params).fetchall() suggestions = [] for row in results: