feat(insights): Baza wiedzy i insights dla rozwoju portalu

System zbierania feedbacku z interakcji użytkowników:
1. Źródła wiedzy:
   - Forum (tematy, odpowiedzi)
   - Chat AI (tylko odpowiedzi asystenta - fakty publiczne)
   - Analiza wzorców pytań (zanonimizowana)

2. Kategorie insights:
   - feature_request (propozycje funkcji)
   - bug_report (zgłoszenia błędów)
   - company_search (wyszukiwania firm)
   - question (częste pytania)

3. Panel admina: /admin/insights
   - Przeglądanie insights
   - Zmiana statusów (new → planned → implemented)
   - Synchronizacja z forum/chat

PRIVACY: Pytania użytkowników są analizowane wzorcowo (anonimizacja),
treść pytań NIE trafia do bazy wiedzy.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-28 22:09:05 +01:00
parent ca03cb0b3b
commit b19dfa7227
4 changed files with 1334 additions and 0 deletions

131
app.py
View File

@ -7155,6 +7155,137 @@ def test_sanitization():
return jsonify({'success': False, 'error': str(e)}), 500
# ============================================================
# DEVELOPMENT INSIGHTS (Roadmap from user feedback)
# ============================================================
@app.route('/admin/insights')
@login_required
def admin_insights():
"""Admin dashboard for development insights from forum and chat"""
if not current_user.is_admin:
flash('Brak uprawnień do tej strony.', 'error')
return redirect(url_for('dashboard'))
return render_template('admin/insights.html')
@app.route('/api/admin/insights', methods=['GET'])
@login_required
def api_get_insights():
"""Get development insights for roadmap"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Admin access required'}), 403
try:
from norda_knowledge_service import get_knowledge_service
service = get_knowledge_service()
status = request.args.get('status')
insights = service.get_development_insights(status=status)
return jsonify({
'success': True,
'insights': insights,
'count': len(insights)
})
except ImportError:
return jsonify({
'success': False,
'error': 'Knowledge service not available'
}), 500
except Exception as e:
logger.error(f"Error getting insights: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/admin/insights/<int:insight_id>/status', methods=['PUT'])
@login_required
def api_update_insight_status(insight_id):
"""Update insight status (for roadmap planning)"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Admin access required'}), 403
try:
from norda_knowledge_service import get_knowledge_service
service = get_knowledge_service()
data = request.get_json()
status = data.get('status')
note = data.get('note')
if not status:
return jsonify({'success': False, 'error': 'Status is required'}), 400
success = service.update_insight_status(insight_id, status, note)
return jsonify({'success': success})
except Exception as e:
logger.error(f"Error updating insight status: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/admin/insights/sync', methods=['POST'])
@login_required
def api_sync_insights():
"""Manually trigger knowledge sync from forum and chat"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Admin access required'}), 403
try:
from norda_knowledge_service import get_knowledge_service
service = get_knowledge_service()
data = request.get_json() or {}
days_back = data.get('days_back', 30)
results = {
'forum': service.sync_forum_knowledge(days_back),
'chat': service.sync_chat_knowledge(days_back),
'questions': service.analyze_user_questions(days_back)
}
return jsonify({
'success': True,
'results': results
})
except ImportError:
return jsonify({
'success': False,
'error': 'Knowledge service not available'
}), 500
except Exception as e:
logger.error(f"Error syncing insights: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/admin/insights/stats', methods=['GET'])
@login_required
def api_insights_stats():
"""Get knowledge base statistics"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Admin access required'}), 403
try:
from norda_knowledge_service import get_knowledge_service
service = get_knowledge_service()
stats = service.get_knowledge_stats()
return jsonify({
'success': True,
'stats': stats
})
except ImportError:
return jsonify({
'success': False,
'error': 'Knowledge service not available'
}), 500
except Exception as e:
logger.error(f"Error getting stats: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/admin/analytics')
@login_required
def admin_analytics():

View File

@ -0,0 +1,66 @@
-- Migration: Create norda_knowledge_chunks table
-- Date: 2026-01-28
-- Description: Knowledge base for Norda GPT from forum and chat
-- Create knowledge chunks table
CREATE TABLE IF NOT EXISTS norda_knowledge_chunks (
id SERIAL PRIMARY KEY,
-- Content
content TEXT NOT NULL,
summary VARCHAR(500),
-- Source tracking
source_type VARCHAR(50) NOT NULL, -- forum_topic, forum_reply, chat_response, manual
source_id INTEGER, -- ID in source table
source_url VARCHAR(500),
-- Metadata
category VARCHAR(100),
keywords TEXT, -- Comma-separated keywords
-- Development insights (for roadmap)
insight_category VARCHAR(50), -- feature_request, bug_report, improvement, question, pain_point, etc.
insight_priority INTEGER DEFAULT 0, -- Higher = more important (based on frequency)
insight_status VARCHAR(50) DEFAULT 'new', -- new, reviewed, planned, implemented, rejected
-- Quality indicators
is_verified BOOLEAN DEFAULT FALSE,
confidence_score INTEGER DEFAULT 50, -- 0-100
-- Companies mentioned (comma-separated IDs)
mentioned_company_ids TEXT,
-- Timestamps
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW(),
source_created_at TIMESTAMP,
-- Status
is_active BOOLEAN DEFAULT TRUE
);
-- Indexes for efficient queries
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_source_type ON norda_knowledge_chunks(source_type);
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_source_id ON norda_knowledge_chunks(source_type, source_id);
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_active ON norda_knowledge_chunks(is_active);
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_category ON norda_knowledge_chunks(category);
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_created ON norda_knowledge_chunks(created_at DESC);
-- Full-text search index (PostgreSQL)
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_content_fts
ON norda_knowledge_chunks USING gin(to_tsvector('polish', content));
-- Development insights indexes
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_insight_cat ON norda_knowledge_chunks(insight_category);
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_insight_status ON norda_knowledge_chunks(insight_status);
CREATE INDEX IF NOT EXISTS idx_norda_knowledge_insight_priority ON norda_knowledge_chunks(insight_priority DESC);
-- Comments
COMMENT ON TABLE norda_knowledge_chunks IS 'Knowledge base for Norda GPT - facts from forum and chat';
COMMENT ON COLUMN norda_knowledge_chunks.source_type IS 'Source: forum_topic, forum_reply, chat_response, manual';
COMMENT ON COLUMN norda_knowledge_chunks.confidence_score IS 'Quality score 0-100, higher = more reliable';
-- Grant permissions
GRANT SELECT, INSERT, UPDATE, DELETE ON norda_knowledge_chunks TO nordabiz_app;
GRANT USAGE, SELECT ON SEQUENCE norda_knowledge_chunks_id_seq TO nordabiz_app;

722
norda_knowledge_service.py Normal file
View File

@ -0,0 +1,722 @@
#!/usr/bin/env python3
"""
Norda Knowledge Base Service
============================
Builds and maintains a knowledge base for Norda GPT from:
1. Forum topics and replies (public discussions)
2. AI chat responses (assistant messages with positive feedback)
PRIVACY: User questions (role='user') are NEVER added to the knowledge base.
Only AI responses (role='assistant') containing public company facts are included.
Author: Norda Biznes Development Team
Created: 2026-01-28
"""
import logging
from datetime import datetime, timedelta
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from enum import Enum
from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, ForeignKey, Enum as SQLEnum
from sqlalchemy.orm import relationship
from database import (
Base,
SessionLocal,
ForumTopic,
ForumReply,
AIChatMessage,
AIChatConversation,
Company
)
logger = logging.getLogger(__name__)
class KnowledgeSourceType(Enum):
"""Source types for knowledge chunks"""
FORUM_TOPIC = "forum_topic"
FORUM_REPLY = "forum_reply"
CHAT_RESPONSE = "chat_response"
CHAT_QUESTION = "chat_question" # Anonymized user questions (for trend analysis)
MANUAL = "manual" # Manually added by admin
class InsightCategory(Enum):
"""Categories for development insights"""
FEATURE_REQUEST = "feature_request" # User wants new feature
BUG_REPORT = "bug_report" # Something doesn't work
IMPROVEMENT = "improvement" # Enhancement to existing feature
QUESTION = "question" # Common question (indicates missing docs/feature)
PAIN_POINT = "pain_point" # User frustration
POSITIVE_FEEDBACK = "positive_feedback" # What works well
COMPANY_SEARCH = "company_search" # What companies users look for
OTHER = "other"
class NordaKnowledgeChunk(Base):
"""
Knowledge chunks extracted from various sources.
Used for:
1. Norda GPT knowledge base
2. Development insights for roadmap
3. User feedback analysis
"""
__tablename__ = 'norda_knowledge_chunks'
id = Column(Integer, primary_key=True)
# Content
content = Column(Text, nullable=False)
summary = Column(String(500)) # Short summary for quick reference
# Source tracking
source_type = Column(String(50), nullable=False) # forum_topic, forum_reply, chat_response, chat_question, manual
source_id = Column(Integer) # ID in source table
source_url = Column(String(500)) # URL to original source
# Metadata
category = Column(String(100)) # Topic category or detected theme
keywords = Column(Text) # Comma-separated keywords for search
# Development insights (for roadmap)
insight_category = Column(String(50)) # feature_request, bug_report, improvement, question, pain_point, etc.
insight_priority = Column(Integer, default=0) # Higher = more important (based on frequency)
insight_status = Column(String(50), default='new') # new, reviewed, planned, implemented, rejected
# Quality indicators
is_verified = Column(Boolean, default=False) # Admin verified
confidence_score = Column(Integer, default=50) # 0-100
# Companies mentioned
mentioned_company_ids = Column(Text) # Comma-separated company IDs
# Timestamps
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
source_created_at = Column(DateTime) # When the source was created
# Status
is_active = Column(Boolean, default=True)
def __repr__(self):
return f"<NordaKnowledgeChunk {self.id}: {self.source_type}>"
@dataclass
class KnowledgeChunkResult:
"""Result from knowledge search"""
chunk_id: int
content: str
summary: Optional[str]
source_type: str
source_url: Optional[str]
relevance_score: float
created_at: datetime
class NordaKnowledgeService:
"""
Service for building and querying the Norda knowledge base.
"""
def __init__(self):
self.min_content_length = 50 # Minimum characters for a chunk
self.max_content_length = 2000 # Maximum characters per chunk
def sync_forum_knowledge(self, days_back: int = 30) -> Dict[str, int]:
"""
Sync knowledge from forum topics and replies.
Args:
days_back: How many days back to sync
Returns:
Dict with counts of added/updated/skipped items
"""
db = SessionLocal()
stats = {'topics_added': 0, 'replies_added': 0, 'skipped': 0, 'errors': 0}
try:
cutoff_date = datetime.now() - timedelta(days=days_back)
# Get forum topics (exclude test category)
topics = db.query(ForumTopic).filter(
ForumTopic.created_at >= cutoff_date,
ForumTopic.category != 'test'
).all()
for topic in topics:
try:
# Check if already exists
existing = db.query(NordaKnowledgeChunk).filter_by(
source_type=KnowledgeSourceType.FORUM_TOPIC.value,
source_id=topic.id
).first()
if existing:
stats['skipped'] += 1
continue
# Create knowledge chunk from topic
if len(topic.content or '') >= self.min_content_length:
chunk = NordaKnowledgeChunk(
content=self._truncate_content(topic.content),
summary=topic.title[:500] if topic.title else None,
source_type=KnowledgeSourceType.FORUM_TOPIC.value,
source_id=topic.id,
source_url=f"/forum/topic/{topic.id}",
category=topic.category,
keywords=self._extract_keywords(topic.title + ' ' + topic.content),
confidence_score=70, # Forum topics are moderately reliable
source_created_at=topic.created_at,
is_active=True
)
db.add(chunk)
stats['topics_added'] += 1
except Exception as e:
logger.error(f"Error processing topic {topic.id}: {e}")
stats['errors'] += 1
# Get forum replies
replies = db.query(ForumReply).filter(
ForumReply.created_at >= cutoff_date
).all()
for reply in replies:
try:
# Skip if topic is test category
if reply.topic and reply.topic.category == 'test':
continue
# Check if already exists
existing = db.query(NordaKnowledgeChunk).filter_by(
source_type=KnowledgeSourceType.FORUM_REPLY.value,
source_id=reply.id
).first()
if existing:
stats['skipped'] += 1
continue
# Create knowledge chunk from reply
if len(reply.content or '') >= self.min_content_length:
chunk = NordaKnowledgeChunk(
content=self._truncate_content(reply.content),
summary=f"Odpowiedź w: {reply.topic.title[:200]}" if reply.topic else None,
source_type=KnowledgeSourceType.FORUM_REPLY.value,
source_id=reply.id,
source_url=f"/forum/topic/{reply.topic_id}#reply-{reply.id}" if reply.topic_id else None,
category=reply.topic.category if reply.topic else None,
keywords=self._extract_keywords(reply.content),
confidence_score=60, # Replies are slightly less reliable
source_created_at=reply.created_at,
is_active=True
)
db.add(chunk)
stats['replies_added'] += 1
except Exception as e:
logger.error(f"Error processing reply {reply.id}: {e}")
stats['errors'] += 1
db.commit()
logger.info(f"Forum knowledge sync complete: {stats}")
except Exception as e:
logger.error(f"Error syncing forum knowledge: {e}")
db.rollback()
raise
finally:
db.close()
return stats
def sync_chat_knowledge(self, days_back: int = 30) -> Dict[str, int]:
"""
Sync knowledge from AI chat responses.
PRIVACY: Only syncs assistant messages (role='assistant').
User messages (role='user') are NEVER added.
Prioritizes responses with positive feedback.
Args:
days_back: How many days back to sync
Returns:
Dict with counts of added/skipped items
"""
db = SessionLocal()
stats = {'responses_added': 0, 'skipped': 0, 'errors': 0}
try:
cutoff_date = datetime.now() - timedelta(days=days_back)
# PRIVACY: Only get assistant messages (NEVER user messages)
# Prioritize messages with positive feedback
messages = db.query(AIChatMessage).filter(
AIChatMessage.role == 'assistant', # CRITICAL: Only assistant messages
AIChatMessage.created_at >= cutoff_date,
# Only include messages with positive or no feedback (exclude negative)
(AIChatMessage.feedback_rating == 2) | (AIChatMessage.feedback_rating.is_(None))
).all()
for msg in messages:
try:
# Check if already exists
existing = db.query(NordaKnowledgeChunk).filter_by(
source_type=KnowledgeSourceType.CHAT_RESPONSE.value,
source_id=msg.id
).first()
if existing:
stats['skipped'] += 1
continue
# Skip short responses or greetings
if len(msg.content or '') < self.min_content_length:
stats['skipped'] += 1
continue
# Skip generic responses without company information
if not self._contains_company_info(msg.content):
stats['skipped'] += 1
continue
# Calculate confidence based on feedback
confidence = 50
if msg.feedback_rating == 2: # Positive feedback
confidence = 85
if msg.companies_mentioned and msg.companies_mentioned > 0:
confidence += 10
# Create knowledge chunk
chunk = NordaKnowledgeChunk(
content=self._truncate_content(msg.content),
summary=self._generate_summary(msg.content),
source_type=KnowledgeSourceType.CHAT_RESPONSE.value,
source_id=msg.id,
source_url=None, # Chat responses don't have public URLs
category='chat_knowledge',
keywords=self._extract_keywords(msg.content),
confidence_score=min(confidence, 100),
source_created_at=msg.created_at,
is_active=True
)
db.add(chunk)
stats['responses_added'] += 1
except Exception as e:
logger.error(f"Error processing chat message {msg.id}: {e}")
stats['errors'] += 1
db.commit()
logger.info(f"Chat knowledge sync complete: {stats}")
except Exception as e:
logger.error(f"Error syncing chat knowledge: {e}")
db.rollback()
raise
finally:
db.close()
return stats
def analyze_user_questions(self, days_back: int = 30) -> Dict[str, int]:
"""
Analyze user questions for development insights.
PRIVACY: Content is anonymized - we only extract PATTERNS/TOPICS, not actual questions.
This helps identify:
- Missing features (what users ask for that doesn't exist)
- Common questions (need better docs/UI)
- Popular company searches (demand analysis)
Args:
days_back: How many days back to analyze
Returns:
Dict with counts of insights by category
"""
db = SessionLocal()
stats = {'insights_added': 0, 'patterns_found': 0}
try:
cutoff_date = datetime.now() - timedelta(days=days_back)
# Get user messages (for pattern analysis only)
messages = db.query(AIChatMessage).filter(
AIChatMessage.role == 'user',
AIChatMessage.created_at >= cutoff_date
).all()
# Analyze patterns (anonymized)
pattern_counts = {}
for msg in messages:
patterns = self._extract_insight_patterns(msg.content)
for pattern, category in patterns:
key = (pattern, category)
pattern_counts[key] = pattern_counts.get(key, 0) + 1
# Create insight chunks for frequently occurring patterns
for (pattern, category), count in pattern_counts.items():
if count >= 3: # Only if pattern appears 3+ times
stats['patterns_found'] += 1
# Check if insight already exists
existing = db.query(NordaKnowledgeChunk).filter(
NordaKnowledgeChunk.source_type == KnowledgeSourceType.CHAT_QUESTION.value,
NordaKnowledgeChunk.summary == pattern
).first()
if existing:
# Update priority based on frequency
existing.insight_priority = max(existing.insight_priority, count)
existing.updated_at = datetime.now()
else:
# Create new insight
chunk = NordaKnowledgeChunk(
content=f"Użytkownicy często pytają o: {pattern} ({count} razy w ostatnich {days_back} dniach)",
summary=pattern,
source_type=KnowledgeSourceType.CHAT_QUESTION.value,
source_id=None, # Anonymized, no specific source
insight_category=category,
insight_priority=count,
insight_status='new',
confidence_score=min(50 + count * 5, 95),
is_active=True
)
db.add(chunk)
stats['insights_added'] += 1
db.commit()
logger.info(f"User question analysis complete: {stats}")
except Exception as e:
logger.error(f"Error analyzing user questions: {e}")
db.rollback()
raise
finally:
db.close()
return stats
def _extract_insight_patterns(self, question: str) -> List[tuple]:
"""
Extract anonymized patterns from user questions.
Returns list of (pattern, category) tuples.
"""
patterns = []
q_lower = question.lower()
# Feature requests
feature_keywords = ['czy można', 'czy da się', 'chciałbym', 'przydałoby się', 'brakuje', 'potrzebuję']
for kw in feature_keywords:
if kw in q_lower:
patterns.append(('Prośba o nową funkcję', InsightCategory.FEATURE_REQUEST.value))
break
# Bug reports
bug_keywords = ['nie działa', 'błąd', 'problem', 'nie mogę', 'nie wyświetla', 'crash']
for kw in bug_keywords:
if kw in q_lower:
patterns.append(('Zgłoszenie problemu', InsightCategory.BUG_REPORT.value))
break
# Company searches
company_keywords = ['firma', 'firmy', 'szukam', 'potrzebuję', 'kto robi', 'kto oferuje']
for kw in company_keywords:
if kw in q_lower:
patterns.append(('Wyszukiwanie firm', InsightCategory.COMPANY_SEARCH.value))
break
# Specific service searches (extract service type)
service_patterns = [
('budownictwo', 'Szukanie: usługi budowlane'),
('transport', 'Szukanie: transport/logistyka'),
('it', 'Szukanie: usługi IT'),
('marketing', 'Szukanie: marketing/reklama'),
('księgowość', 'Szukanie: księgowość'),
('prawo', 'Szukanie: usługi prawne'),
]
for keyword, pattern in service_patterns:
if keyword in q_lower:
patterns.append((pattern, InsightCategory.COMPANY_SEARCH.value))
# Questions about portal
portal_keywords = ['jak', 'gdzie', 'co to', 'po co', 'dlaczego']
for kw in portal_keywords:
if kw in q_lower and ('portal' in q_lower or 'strona' in q_lower or 'norda' in q_lower):
patterns.append(('Pytanie o portal', InsightCategory.QUESTION.value))
break
return patterns
def get_development_insights(self, status: Optional[str] = None) -> List[Dict[str, Any]]:
"""
Get development insights for roadmap planning.
Args:
status: Filter by status (new, reviewed, planned, implemented, rejected)
Returns:
List of insight dicts ordered by priority
"""
db = SessionLocal()
try:
query = db.query(NordaKnowledgeChunk).filter(
NordaKnowledgeChunk.is_active == True,
NordaKnowledgeChunk.insight_category.isnot(None)
)
if status:
query = query.filter(NordaKnowledgeChunk.insight_status == status)
insights = query.order_by(
NordaKnowledgeChunk.insight_priority.desc(),
NordaKnowledgeChunk.created_at.desc()
).limit(100).all()
return [
{
'id': i.id,
'summary': i.summary,
'content': i.content,
'category': i.insight_category,
'priority': i.insight_priority,
'status': i.insight_status,
'source_type': i.source_type,
'created_at': i.created_at.isoformat() if i.created_at else None
}
for i in insights
]
finally:
db.close()
def update_insight_status(self, insight_id: int, status: str, note: Optional[str] = None) -> bool:
"""Update the status of a development insight"""
db = SessionLocal()
try:
insight = db.query(NordaKnowledgeChunk).filter_by(id=insight_id).first()
if not insight:
return False
insight.insight_status = status
insight.updated_at = datetime.now()
if note:
insight.content = f"{insight.content}\n\n[Admin note: {note}]"
db.commit()
return True
except Exception as e:
logger.error(f"Error updating insight status: {e}")
db.rollback()
return False
finally:
db.close()
def search_knowledge(
self,
query: str,
limit: int = 5,
source_types: Optional[List[str]] = None
) -> List[KnowledgeChunkResult]:
"""
Search knowledge base for relevant chunks.
Args:
query: Search query
limit: Maximum results to return
source_types: Filter by source types (None = all)
Returns:
List of KnowledgeChunkResult ordered by relevance
"""
db = SessionLocal()
results = []
try:
# Build base query
base_query = db.query(NordaKnowledgeChunk).filter(
NordaKnowledgeChunk.is_active == True
)
# Filter by source types
if source_types:
base_query = base_query.filter(
NordaKnowledgeChunk.source_type.in_(source_types)
)
# Simple keyword search (can be enhanced with FTS later)
query_words = query.lower().split()
chunks = base_query.all()
# Score each chunk
scored_chunks = []
for chunk in chunks:
score = self._calculate_relevance(query_words, chunk)
if score > 0:
scored_chunks.append((chunk, score))
# Sort by score and limit
scored_chunks.sort(key=lambda x: x[1], reverse=True)
top_chunks = scored_chunks[:limit]
# Convert to results
for chunk, score in top_chunks:
results.append(KnowledgeChunkResult(
chunk_id=chunk.id,
content=chunk.content,
summary=chunk.summary,
source_type=chunk.source_type,
source_url=chunk.source_url,
relevance_score=score,
created_at=chunk.source_created_at or chunk.created_at
))
finally:
db.close()
return results
def get_knowledge_stats(self) -> Dict[str, Any]:
"""Get statistics about the knowledge base"""
db = SessionLocal()
try:
total = db.query(NordaKnowledgeChunk).filter(
NordaKnowledgeChunk.is_active == True
).count()
by_source = {}
for source_type in KnowledgeSourceType:
count = db.query(NordaKnowledgeChunk).filter(
NordaKnowledgeChunk.is_active == True,
NordaKnowledgeChunk.source_type == source_type.value
).count()
by_source[source_type.value] = count
verified = db.query(NordaKnowledgeChunk).filter(
NordaKnowledgeChunk.is_active == True,
NordaKnowledgeChunk.is_verified == True
).count()
return {
'total_chunks': total,
'by_source': by_source,
'verified_chunks': verified,
'last_sync': datetime.now().isoformat()
}
finally:
db.close()
def _truncate_content(self, content: str) -> str:
"""Truncate content to max length"""
if len(content) <= self.max_content_length:
return content
return content[:self.max_content_length - 3] + "..."
def _extract_keywords(self, text: str) -> str:
"""Extract keywords from text (simple implementation)"""
# Remove common words and extract significant terms
stopwords = {
'i', 'a', 'the', 'to', 'w', 'z', 'na', 'do', 'jest', '', 'być',
'że', 'o', 'nie', 'się', 'jak', 'co', 'dla', 'po', 'od', 'za',
'ale', 'lub', 'oraz', 'czy', 'tak', 'już', 'tylko', 'też', 'jeszcze'
}
words = text.lower().split()
keywords = [w for w in words if len(w) > 3 and w not in stopwords]
# Return unique keywords, limited
unique_keywords = list(dict.fromkeys(keywords))[:20]
return ','.join(unique_keywords)
def _generate_summary(self, content: str) -> Optional[str]:
"""Generate a short summary (first sentence or 100 chars)"""
if not content:
return None
# Try to get first sentence
sentences = content.split('.')
if sentences:
first = sentences[0].strip()
if len(first) > 10:
return first[:200] + ('...' if len(first) > 200 else '')
return content[:100] + '...'
def _contains_company_info(self, content: str) -> bool:
"""Check if content contains company-related information"""
company_keywords = [
'firma', 'firmy', 'firmę', 'spółka', 'przedsiębiorstwo',
'usługi', 'produkty', 'kontakt', 'telefon', 'email',
'norda', 'członek', 'biznes', 'współpraca'
]
content_lower = content.lower()
return any(kw in content_lower for kw in company_keywords)
def _calculate_relevance(self, query_words: List[str], chunk: NordaKnowledgeChunk) -> float:
"""Calculate relevance score for a chunk"""
score = 0.0
content_lower = (chunk.content or '').lower()
keywords = (chunk.keywords or '').lower()
summary = (chunk.summary or '').lower()
for word in query_words:
# Content matches
if word in content_lower:
score += 1.0
# Keyword matches (higher weight)
if word in keywords:
score += 2.0
# Summary matches
if word in summary:
score += 1.5
# Boost verified chunks
if chunk.is_verified:
score *= 1.2
# Boost by confidence
score *= (chunk.confidence_score or 50) / 100
return score
# Global instance
_service_instance: Optional[NordaKnowledgeService] = None
def get_knowledge_service() -> NordaKnowledgeService:
"""Get or create global NordaKnowledgeService instance"""
global _service_instance
if _service_instance is None:
_service_instance = NordaKnowledgeService()
return _service_instance
# Convenience functions
def sync_all_knowledge(days_back: int = 30) -> Dict[str, Any]:
"""Sync knowledge from all sources"""
service = get_knowledge_service()
return {
'forum': service.sync_forum_knowledge(days_back),
'chat': service.sync_chat_knowledge(days_back)
}
def search_knowledge(query: str, limit: int = 5) -> List[KnowledgeChunkResult]:
"""Search the knowledge base"""
return get_knowledge_service().search_knowledge(query, limit)

View File

@ -0,0 +1,415 @@
{% extends "base.html" %}
{% block title %}Insights - Rozwój portalu - Norda Biznes Partner{% endblock %}
{% block extra_css %}
<style>
.insights-header {
margin-bottom: var(--spacing-xl);
}
.insights-header h1 {
font-size: var(--font-size-3xl);
color: var(--text-primary);
display: flex;
align-items: center;
gap: var(--spacing-md);
}
.stats-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
gap: var(--spacing-lg);
margin-bottom: var(--spacing-2xl);
}
.stat-card {
background: var(--surface);
padding: var(--spacing-lg);
border-radius: var(--radius-lg);
box-shadow: var(--shadow);
text-align: center;
}
.stat-value {
font-size: var(--font-size-3xl);
font-weight: 700;
color: var(--primary);
}
.stat-label {
color: var(--text-secondary);
font-size: var(--font-size-sm);
margin-top: var(--spacing-xs);
}
.filters-bar {
display: flex;
gap: var(--spacing-md);
margin-bottom: var(--spacing-xl);
flex-wrap: wrap;
align-items: center;
}
.filter-btn {
padding: var(--spacing-sm) var(--spacing-md);
border: 1px solid var(--border-color);
border-radius: var(--radius-md);
background: var(--surface);
cursor: pointer;
transition: all 0.2s;
}
.filter-btn:hover {
background: var(--bg-secondary);
}
.filter-btn.active {
background: var(--primary);
color: white;
border-color: var(--primary);
}
.insights-list {
display: flex;
flex-direction: column;
gap: var(--spacing-md);
}
.insight-card {
background: var(--surface);
padding: var(--spacing-lg);
border-radius: var(--radius-lg);
box-shadow: var(--shadow);
border-left: 4px solid var(--border-color);
}
.insight-card.feature_request { border-left-color: var(--primary); }
.insight-card.bug_report { border-left-color: var(--error); }
.insight-card.improvement { border-left-color: var(--warning); }
.insight-card.question { border-left-color: var(--info); }
.insight-card.company_search { border-left-color: var(--success); }
.insight-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
margin-bottom: var(--spacing-md);
}
.insight-title {
font-weight: 600;
font-size: var(--font-size-lg);
color: var(--text-primary);
}
.insight-badges {
display: flex;
gap: var(--spacing-sm);
}
.badge {
padding: 2px 8px;
border-radius: var(--radius);
font-size: var(--font-size-xs);
font-weight: 500;
}
.badge-category {
background: var(--bg-secondary);
color: var(--text-secondary);
}
.badge-priority {
background: var(--primary-light);
color: var(--primary);
}
.badge-status {
background: var(--success-light);
color: var(--success);
}
.badge-status.reviewed { background: var(--info-light); color: var(--info); }
.badge-status.planned { background: var(--warning-light); color: var(--warning); }
.badge-status.implemented { background: var(--success-light); color: var(--success); }
.badge-status.rejected { background: var(--error-light); color: var(--error); }
.insight-content {
color: var(--text-secondary);
font-size: var(--font-size-sm);
margin-bottom: var(--spacing-md);
}
.insight-actions {
display: flex;
gap: var(--spacing-sm);
}
.insight-actions select {
padding: var(--spacing-xs) var(--spacing-sm);
border: 1px solid var(--border-color);
border-radius: var(--radius);
font-size: var(--font-size-sm);
}
.sync-btn {
margin-left: auto;
}
.loading {
text-align: center;
padding: var(--spacing-2xl);
color: var(--text-secondary);
}
.empty-state {
text-align: center;
padding: var(--spacing-2xl);
color: var(--text-secondary);
}
.category-icon {
margin-right: var(--spacing-xs);
}
</style>
{% endblock %}
{% block content %}
<div class="insights-header">
<h1>
<svg width="32" height="32" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24" style="color: var(--primary);">
<path d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z"/>
</svg>
Insights - Rozwój portalu
</h1>
<p style="color: var(--text-secondary);">Zbieraj pomysły i feedback z interakcji użytkowników</p>
</div>
<!-- Stats -->
<div class="stats-grid" id="statsGrid">
<div class="stat-card">
<div class="stat-value" id="statTotal">-</div>
<div class="stat-label">Wszystkich insights</div>
</div>
<div class="stat-card">
<div class="stat-value" id="statNew">-</div>
<div class="stat-label">Nowych</div>
</div>
<div class="stat-card">
<div class="stat-value" id="statPlanned">-</div>
<div class="stat-label">Zaplanowanych</div>
</div>
<div class="stat-card">
<div class="stat-value" id="statImplemented">-</div>
<div class="stat-label">Zrealizowanych</div>
</div>
</div>
<!-- Filters -->
<div class="filters-bar">
<button class="filter-btn active" data-status="">Wszystkie</button>
<button class="filter-btn" data-status="new">Nowe</button>
<button class="filter-btn" data-status="reviewed">Przejrzane</button>
<button class="filter-btn" data-status="planned">Zaplanowane</button>
<button class="filter-btn" data-status="implemented">Zrealizowane</button>
<button class="filter-btn" data-status="rejected">Odrzucone</button>
<button class="btn btn-primary sync-btn" onclick="syncInsights()">
<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24" style="margin-right: 6px;">
<path d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15"/>
</svg>
Synchronizuj
</button>
</div>
<!-- Insights List -->
<div class="insights-list" id="insightsList">
<div class="loading">Ładowanie insights...</div>
</div>
{% endblock %}
{% block extra_js %}
let currentStatus = '';
const categoryLabels = {
'feature_request': 'Propozycja funkcji',
'bug_report': 'Zgłoszenie błędu',
'improvement': 'Ulepszenie',
'question': 'Pytanie',
'pain_point': 'Problem',
'company_search': 'Wyszukiwanie firm',
'positive_feedback': 'Pozytywny feedback',
'other': 'Inne'
};
const categoryIcons = {
'feature_request': '💡',
'bug_report': '🐛',
'improvement': '⚡',
'question': '❓',
'pain_point': '😤',
'company_search': '🔍',
'positive_feedback': '👍',
'other': '📝'
};
const statusLabels = {
'new': 'Nowy',
'reviewed': 'Przejrzany',
'planned': 'Zaplanowany',
'implemented': 'Zrealizowany',
'rejected': 'Odrzucony'
};
async function loadInsights() {
const container = document.getElementById('insightsList');
container.innerHTML = '<div class="loading">Ładowanie insights...</div>';
try {
const url = currentStatus
? `/api/admin/insights?status=${currentStatus}`
: '/api/admin/insights';
const response = await fetch(url);
const data = await response.json();
if (!data.success) {
container.innerHTML = `<div class="empty-state">Błąd: ${data.error}</div>`;
return;
}
if (data.insights.length === 0) {
container.innerHTML = `
<div class="empty-state">
<p>Brak insights${currentStatus ? ' o statusie "' + statusLabels[currentStatus] + '"' : ''}.</p>
<p style="margin-top: var(--spacing-md);">Kliknij "Synchronizuj" aby pobrać dane z forum i czata.</p>
</div>
`;
return;
}
container.innerHTML = data.insights.map(insight => `
<div class="insight-card ${insight.category || ''}">
<div class="insight-header">
<div class="insight-title">
<span class="category-icon">${categoryIcons[insight.category] || '📝'}</span>
${insight.summary || 'Bez tytułu'}
</div>
<div class="insight-badges">
<span class="badge badge-category">${categoryLabels[insight.category] || insight.category}</span>
<span class="badge badge-priority">Priorytet: ${insight.priority || 0}</span>
<span class="badge badge-status ${insight.status}">${statusLabels[insight.status] || insight.status}</span>
</div>
</div>
<div class="insight-content">${insight.content || ''}</div>
<div class="insight-actions">
<select onchange="updateInsightStatus(${insight.id}, this.value)">
<option value="">Zmień status...</option>
<option value="reviewed">Przejrzany</option>
<option value="planned">Zaplanowany</option>
<option value="implemented">Zrealizowany</option>
<option value="rejected">Odrzucony</option>
</select>
<span style="font-size: var(--font-size-xs); color: var(--text-muted);">
${insight.source_type} | ${insight.created_at ? new Date(insight.created_at).toLocaleDateString('pl-PL') : ''}
</span>
</div>
</div>
`).join('');
} catch (error) {
console.error('Error loading insights:', error);
container.innerHTML = '<div class="empty-state">Błąd ładowania danych</div>';
}
}
async function loadStats() {
try {
const response = await fetch('/api/admin/insights/stats');
const data = await response.json();
if (data.success && data.stats) {
document.getElementById('statTotal').textContent = data.stats.total_chunks || 0;
// Note: detailed stats by status would need additional backend support
}
} catch (error) {
console.error('Error loading stats:', error);
}
}
async function updateInsightStatus(insightId, status) {
if (!status) return;
try {
const response = await fetch(`/api/admin/insights/${insightId}/status`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ status })
});
const data = await response.json();
if (data.success) {
loadInsights();
} else {
alert('Błąd: ' + (data.error || 'Nieznany błąd'));
}
} catch (error) {
console.error('Error updating status:', error);
alert('Błąd połączenia');
}
}
async function syncInsights() {
const btn = document.querySelector('.sync-btn');
btn.disabled = true;
btn.innerHTML = '<span class="spinner"></span> Synchronizuję...';
try {
const response = await fetch('/api/admin/insights/sync', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ days_back: 30 })
});
const data = await response.json();
if (data.success) {
const r = data.results;
alert(`Synchronizacja zakończona!\n\n` +
`Forum: ${r.forum?.topics_added || 0} tematów, ${r.forum?.replies_added || 0} odpowiedzi\n` +
`Chat: ${r.chat?.responses_added || 0} odpowiedzi AI\n` +
`Insights: ${r.questions?.insights_added || 0} nowych wzorców`
);
loadInsights();
loadStats();
} else {
alert('Błąd: ' + (data.error || 'Nieznany błąd'));
}
} catch (error) {
console.error('Error syncing:', error);
alert('Błąd połączenia');
} finally {
btn.disabled = false;
btn.innerHTML = `
<svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24" style="margin-right: 6px;">
<path d="M4 4v5h.582m15.356 2A8.001 8.001 0 004.582 9m0 0H9m11 11v-5h-.581m0 0a8.003 8.003 0 01-15.357-2m15.357 2H15"/>
</svg>
Synchronizuj
`;
}
}
// Filter buttons
document.querySelectorAll('.filter-btn').forEach(btn => {
btn.addEventListener('click', () => {
document.querySelectorAll('.filter-btn').forEach(b => b.classList.remove('active'));
btn.classList.add('active');
currentStatus = btn.dataset.status;
loadInsights();
});
});
// Load on page load
loadInsights();
loadStats();
{% endblock %}