From 96fa0058c2c3fefd4d216c70e0832c75809e4688 Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Sat, 17 Jan 2026 10:57:11 +0100 Subject: [PATCH] feat(zopk): Rozbudowa bazy wiedzy ZOPK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Dodano skrypt cron do automatycznej ekstrakcji wiedzy (scripts/cron_extract_knowledge.py) - Dodano panel deduplikacji faktów (/admin/zopk/knowledge/fact-duplicates) - Dodano API i funkcje auto-weryfikacji encji i faktów - Dodano panel Timeline ZOPK (/admin/zopk/timeline) z CRUD - Rozszerzono dashboard bazy wiedzy o statystyki weryfikacji i przyciski auto-weryfikacji - Dodano migrację 016_zopk_milestones.sql dla tabeli kamieni milowych - Naprawiono duplikat modelu ZOPKMilestone w database.py Co-Authored-By: Claude Opus 4.5 --- app.py | 265 +++++++++++++++ database.py | 96 +++--- database/migrations/016_zopk_milestones.sql | 21 +- scripts/cron_extract_knowledge.py | 101 ++++++ templates/admin/zopk_fact_duplicates.html | 217 +++++++++++++ templates/admin/zopk_knowledge_dashboard.html | 137 ++++++++ templates/admin/zopk_timeline.html | 302 ++++++++++++++++++ zopk_knowledge_service.py | 153 +++++++++ 8 files changed, 1225 insertions(+), 67 deletions(-) create mode 100644 scripts/cron_extract_knowledge.py create mode 100644 templates/admin/zopk_fact_duplicates.html create mode 100644 templates/admin/zopk_timeline.html diff --git a/app.py b/app.py index a1f707f..14381d1 100644 --- a/app.py +++ b/app.py @@ -12052,6 +12052,271 @@ def api_zopk_knowledge_graph_data(): db.close() +# ============================================================ +# ZOPK KNOWLEDGE - FACT DUPLICATES +# ============================================================ + +@app.route('/admin/zopk/knowledge/fact-duplicates') +@login_required +def admin_zopk_fact_duplicates(): + """Panel deduplikacji faktów.""" + if not current_user.is_admin: + flash('Brak uprawnień.', 'error') + return redirect(url_for('dashboard')) + return render_template('admin/zopk_fact_duplicates.html') + + +@app.route('/api/zopk/knowledge/fact-duplicates') +@login_required +def api_zopk_fact_duplicates(): + """API - lista duplikatów faktów.""" + if not current_user.is_admin: + return jsonify({'error': 'Forbidden'}), 403 + + from zopk_knowledge_service import find_duplicate_facts + db = SessionLocal() + try: + min_sim = float(request.args.get('min_similarity', 0.7)) + fact_type = request.args.get('fact_type', '') + limit = min(int(request.args.get('limit', 100)), 500) + + duplicates = find_duplicate_facts(db, min_sim, limit, fact_type if fact_type else None) + return jsonify({'success': True, 'duplicates': duplicates, 'count': len(duplicates)}) + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + + +@app.route('/api/zopk/knowledge/fact-duplicates/merge', methods=['POST']) +@login_required +def api_zopk_fact_merge(): + """API - merge duplikatów faktów.""" + if not current_user.is_admin: + return jsonify({'error': 'Forbidden'}), 403 + + from zopk_knowledge_service import merge_facts + db = SessionLocal() + try: + data = request.get_json() + primary_id = data.get('primary_id') + duplicate_id = data.get('duplicate_id') + new_text = data.get('new_text') + + result = merge_facts(db, primary_id, duplicate_id, new_text) + return jsonify(result) + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + + +# ============================================================ +# ZOPK KNOWLEDGE - AUTO VERIFY +# ============================================================ + +@app.route('/api/zopk/knowledge/auto-verify/entities', methods=['POST']) +@login_required +def api_zopk_auto_verify_entities(): + """Auto-weryfikacja encji z wysoką liczbą wzmianek.""" + if not current_user.is_admin: + return jsonify({'error': 'Forbidden'}), 403 + + from zopk_knowledge_service import auto_verify_top_entities + db = SessionLocal() + try: + data = request.get_json() or {} + min_mentions = int(data.get('min_mentions', 5)) + limit = int(data.get('limit', 100)) + + result = auto_verify_top_entities(db, min_mentions, limit) + return jsonify(result) + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + + +@app.route('/api/zopk/knowledge/auto-verify/facts', methods=['POST']) +@login_required +def api_zopk_auto_verify_facts(): + """Auto-weryfikacja faktów z wysoką ważnością.""" + if not current_user.is_admin: + return jsonify({'error': 'Forbidden'}), 403 + + from zopk_knowledge_service import auto_verify_top_facts + db = SessionLocal() + try: + data = request.get_json() or {} + min_importance = float(data.get('min_importance', 0.7)) + limit = int(data.get('limit', 200)) + + result = auto_verify_top_facts(db, min_importance, limit) + return jsonify(result) + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + + +# ============================================================ +# ZOPK KNOWLEDGE - DASHBOARD +# ============================================================ + +@app.route('/api/zopk/knowledge/dashboard-stats') +@login_required +def api_zopk_dashboard_stats(): + """API - statystyki dashboardu.""" + if not current_user.is_admin: + return jsonify({'error': 'Forbidden'}), 403 + + from zopk_knowledge_service import get_knowledge_dashboard_stats + db = SessionLocal() + try: + stats = get_knowledge_dashboard_stats(db) + return jsonify({'success': True, **stats}) + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + + +# ============================================================ +# ZOPK MILESTONES / TIMELINE +# ============================================================ + +@app.route('/admin/zopk/timeline') +@login_required +def admin_zopk_timeline(): + """Panel Timeline ZOPK.""" + if not current_user.is_admin: + flash('Brak uprawnień.', 'error') + return redirect(url_for('dashboard')) + return render_template('admin/zopk_timeline.html') + + +@app.route('/api/zopk/milestones') +@login_required +def api_zopk_milestones(): + """API - lista kamieni milowych ZOPK.""" + from database import ZOPKMilestone + db = SessionLocal() + try: + milestones = db.query(ZOPKMilestone).order_by(ZOPKMilestone.target_date).all() + return jsonify({ + 'success': True, + 'milestones': [{ + 'id': m.id, + 'title': m.title, + 'description': m.description, + 'category': m.category, + 'target_date': m.target_date.isoformat() if m.target_date else None, + 'actual_date': m.actual_date.isoformat() if m.actual_date else None, + 'status': m.status, + 'source_url': m.source_url + } for m in milestones] + }) + except Exception as e: + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + + +@app.route('/api/zopk/milestones', methods=['POST']) +@login_required +def api_zopk_milestone_create(): + """API - utworzenie kamienia milowego.""" + if not current_user.is_admin: + return jsonify({'error': 'Forbidden'}), 403 + + from database import ZOPKMilestone + from datetime import datetime + db = SessionLocal() + try: + data = request.get_json() + milestone = ZOPKMilestone( + title=data['title'], + description=data.get('description'), + category=data.get('category', 'other'), + target_date=datetime.strptime(data['target_date'], '%Y-%m-%d').date() if data.get('target_date') else None, + actual_date=datetime.strptime(data['actual_date'], '%Y-%m-%d').date() if data.get('actual_date') else None, + status=data.get('status', 'planned'), + source_url=data.get('source_url'), + source_news_id=data.get('source_news_id') + ) + db.add(milestone) + db.commit() + return jsonify({'success': True, 'id': milestone.id}) + except Exception as e: + db.rollback() + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + + +@app.route('/api/zopk/milestones/', methods=['PUT']) +@login_required +def api_zopk_milestone_update(milestone_id): + """API - aktualizacja kamienia milowego.""" + if not current_user.is_admin: + return jsonify({'error': 'Forbidden'}), 403 + + from database import ZOPKMilestone + from datetime import datetime + db = SessionLocal() + try: + milestone = db.query(ZOPKMilestone).get(milestone_id) + if not milestone: + return jsonify({'error': 'Not found'}), 404 + + data = request.get_json() + if 'title' in data: + milestone.title = data['title'] + if 'description' in data: + milestone.description = data['description'] + if 'category' in data: + milestone.category = data['category'] + if 'target_date' in data: + milestone.target_date = datetime.strptime(data['target_date'], '%Y-%m-%d').date() if data['target_date'] else None + if 'actual_date' in data: + milestone.actual_date = datetime.strptime(data['actual_date'], '%Y-%m-%d').date() if data['actual_date'] else None + if 'status' in data: + milestone.status = data['status'] + if 'source_url' in data: + milestone.source_url = data['source_url'] + + db.commit() + return jsonify({'success': True}) + except Exception as e: + db.rollback() + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + + +@app.route('/api/zopk/milestones/', methods=['DELETE']) +@login_required +def api_zopk_milestone_delete(milestone_id): + """API - usunięcie kamienia milowego.""" + if not current_user.is_admin: + return jsonify({'error': 'Forbidden'}), 403 + + from database import ZOPKMilestone + db = SessionLocal() + try: + milestone = db.query(ZOPKMilestone).get(milestone_id) + if not milestone: + return jsonify({'error': 'Not found'}), 404 + + db.delete(milestone) + db.commit() + return jsonify({'success': True}) + except Exception as e: + db.rollback() + return jsonify({'success': False, 'error': str(e)}), 500 + finally: + db.close() + # ============================================================ # KRS AUDIT (Krajowy Rejestr Sądowy) # ============================================================ diff --git a/database.py b/database.py index e858437..6d651c2 100644 --- a/database.py +++ b/database.py @@ -1841,62 +1841,6 @@ class ZOPKStakeholderProject(Base): ) -class ZOPKMilestone(Base): - """ - Timeline milestones for ZOPK projects. - Tracks key events: announcements, decisions, construction, completions. - Used for public timeline visualization on /zopk page. - """ - __tablename__ = 'zopk_milestones' - - id = Column(Integer, primary_key=True) - - # Basic info - title = Column(String(255), nullable=False) - description = Column(Text) - - # Categorization - # Types: announcement, decision, construction_start, construction_progress, - # completion, investment, agreement, regulation - milestone_type = Column(String(50), nullable=False, default='announcement') - - # Project association - project_id = Column(Integer, ForeignKey('zopk_projects.id', ondelete='SET NULL')) - - # Timeline - target_date = Column(Date) # Planned/expected date - actual_date = Column(Date) # Actual completion date (if completed) - date_precision = Column(String(20), default='exact') # exact, month, quarter, year - - # Status: planned, in_progress, completed, delayed, cancelled - status = Column(String(20), nullable=False, default='planned') - - # Source linking - source_news_id = Column(Integer, ForeignKey('zopk_news.id', ondelete='SET NULL')) - source_fact_id = Column(Integer, ForeignKey('zopk_knowledge_facts.id', ondelete='SET NULL')) - source_url = Column(String(1000)) - - # Display settings - icon = Column(String(50), default='📌') - color = Column(String(7), default='#059669') - is_featured = Column(Boolean, default=False) - display_order = Column(Integer, default=0) - - # Verification - is_verified = Column(Boolean, default=False) - verified_by = Column(Integer, ForeignKey('users.id')) - verified_at = Column(DateTime) - - # Timestamps - created_at = Column(DateTime, default=datetime.now) - updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now) - - # Relationships - project = relationship('ZOPKProject', backref='milestones') - # Note: source_news relationship defined in ZOPKNews class via backref - verifier = relationship('User', foreign_keys=[verified_by]) - - class ZOPKNews(Base): """ News articles about ZOPK with approval workflow. @@ -3056,6 +3000,46 @@ class SecurityAlert(Base): return f"" +# ============================================================ +# ZOPK MILESTONES (Timeline) +# ============================================================ + +class ZOPKMilestone(Base): + """ + Kamienie milowe projektu ZOPK dla wizualizacji timeline. + """ + __tablename__ = 'zopk_milestones' + + id = Column(Integer, primary_key=True) + title = Column(String(500), nullable=False) + description = Column(Text) + + # Kategoria: nuclear, offshore, infrastructure, defense, other + category = Column(String(50), default='other') + + # Daty + target_date = Column(Date) # Planowana data + actual_date = Column(Date) # Rzeczywista data (jeśli zakończone) + + # Status: planned, in_progress, completed, delayed, cancelled + status = Column(String(20), default='planned') + + # Źródło informacji + source_url = Column(String(1000)) + source_news_id = Column(Integer, ForeignKey('zopk_news.id')) + + # Wyświetlanie + icon = Column(String(50)) # emoji lub ikona + color = Column(String(20)) # kolor dla timeline + is_featured = Column(Boolean, default=False) + + created_at = Column(DateTime, default=datetime.now) + updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now) + + # Relationships + source_news = relationship('ZOPKNews', backref='milestones') + + # ============================================================ # DATABASE INITIALIZATION # ============================================================ diff --git a/database/migrations/016_zopk_milestones.sql b/database/migrations/016_zopk_milestones.sql index 616046c..c0be97a 100644 --- a/database/migrations/016_zopk_milestones.sql +++ b/database/migrations/016_zopk_milestones.sql @@ -13,10 +13,9 @@ CREATE TABLE IF NOT EXISTS zopk_milestones ( title VARCHAR(255) NOT NULL, description TEXT, - -- Categorization - milestone_type VARCHAR(50) NOT NULL DEFAULT 'announcement', - -- Types: announcement, decision, construction_start, construction_progress, - -- completion, investment, agreement, regulation + -- Categorization (for UI grouping) + category VARCHAR(50) NOT NULL DEFAULT 'other', + -- Categories: nuclear, offshore, infrastructure, defense, other -- Project association (optional) project_id INTEGER REFERENCES zopk_projects(id) ON DELETE SET NULL, @@ -55,7 +54,7 @@ CREATE TABLE IF NOT EXISTS zopk_milestones ( CREATE INDEX IF NOT EXISTS idx_milestones_project ON zopk_milestones(project_id); CREATE INDEX IF NOT EXISTS idx_milestones_target_date ON zopk_milestones(target_date); CREATE INDEX IF NOT EXISTS idx_milestones_status ON zopk_milestones(status); -CREATE INDEX IF NOT EXISTS idx_milestones_type ON zopk_milestones(milestone_type); +CREATE INDEX IF NOT EXISTS idx_milestones_category ON zopk_milestones(category); CREATE INDEX IF NOT EXISTS idx_milestones_featured ON zopk_milestones(is_featured) WHERE is_featured = TRUE; -- Grant permissions @@ -63,12 +62,12 @@ GRANT ALL ON TABLE zopk_milestones TO nordabiz_app; GRANT USAGE, SELECT ON SEQUENCE zopk_milestones_id_seq TO nordabiz_app; -- Insert sample milestones from known ZOPK events (all verified for display) -INSERT INTO zopk_milestones (title, description, milestone_type, target_date, status, icon, color, is_featured, is_verified, display_order) VALUES - ('Podpisanie porozumienia MON-Kongsberg', 'Podpisanie porozumienia o współpracy między MON a Kongsberg Defence & Aerospace w zakresie budowy fabryki w Rumi', 'agreement', '2024-03-15', 'completed', '📝', '#059669', TRUE, TRUE, 10), - ('Pozwolenie środowiskowe Baltic Power', 'Uzyskanie pozwolenia środowiskowego dla morskiej farmy wiatrowej Baltic Power', 'regulation', '2025-06-15', 'completed', '📋', '#10b981', FALSE, TRUE, 40), - ('Rozpoczęcie budowy fabryki Kongsberg w Rumi', 'Start prac budowlanych zakładu produkcji dronów morskich w Rumi Invest Park', 'construction_start', '2025-09-01', 'in_progress', '🏗️', '#f59e0b', TRUE, TRUE, 30), - ('Decyzja lokalizacyjna elektrowni jądrowej', 'Wydanie decyzji lokalizacyjnej dla elektrowni jądrowej w Lubiatowie-Kopalino', 'decision', '2026-03-01', 'planned', '⚖️', '#3b82f6', TRUE, TRUE, 20), - ('Uruchomienie pierwszego bloku jądrowego', 'Planowane uruchomienie pierwszego bloku elektrowni jądrowej w Lubiatowie', 'completion', '2033-12-01', 'planned', '⚡', '#8b5cf6', TRUE, TRUE, 100) +INSERT INTO zopk_milestones (title, description, category, target_date, status, icon, color, is_featured, is_verified, display_order) VALUES + ('Podpisanie porozumienia MON-Kongsberg', 'Podpisanie porozumienia o współpracy między MON a Kongsberg Defence & Aerospace w zakresie budowy fabryki w Rumi', 'defense', '2024-03-15', 'completed', '📝', '#059669', TRUE, TRUE, 10), + ('Pozwolenie środowiskowe Baltic Power', 'Uzyskanie pozwolenia środowiskowego dla morskiej farmy wiatrowej Baltic Power', 'offshore', '2025-06-15', 'completed', '📋', '#10b981', FALSE, TRUE, 40), + ('Rozpoczęcie budowy fabryki Kongsberg w Rumi', 'Start prac budowlanych zakładu produkcji dronów morskich w Rumi Invest Park', 'defense', '2025-09-01', 'in_progress', '🏗️', '#f59e0b', TRUE, TRUE, 30), + ('Decyzja lokalizacyjna elektrowni jądrowej', 'Wydanie decyzji lokalizacyjnej dla elektrowni jądrowej w Lubiatowie-Kopalino', 'nuclear', '2026-03-01', 'planned', '⚖️', '#3b82f6', TRUE, TRUE, 20), + ('Uruchomienie pierwszego bloku jądrowego', 'Planowane uruchomienie pierwszego bloku elektrowni jądrowej w Lubiatowie', 'nuclear', '2033-12-01', 'planned', '⚡', '#8b5cf6', TRUE, TRUE, 100) ON CONFLICT DO NOTHING; -- Comment diff --git a/scripts/cron_extract_knowledge.py b/scripts/cron_extract_knowledge.py new file mode 100644 index 0000000..2838d62 --- /dev/null +++ b/scripts/cron_extract_knowledge.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +Cron job do automatycznej ekstrakcji wiedzy z nowych newsów ZOPK. +Uruchamiany co 2-4 godziny. + +Użycie: + python3 scripts/cron_extract_knowledge.py [--limit N] [--dry-run] +""" +import sys +import os +import argparse +import logging +from datetime import datetime + +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from dotenv import load_dotenv +load_dotenv() + +from database import SessionLocal, ZOPKNews, ZOPKKnowledgeChunk +from sqlalchemy import text + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +def find_news_pending_extraction(db, limit: int = 20): + """Znajdź newsy z treścią ale bez ekstrakcji.""" + result = db.execute(text(''' + SELECT n.id, n.title, LENGTH(n.full_content) as content_len + FROM zopk_news n + WHERE n.status IN ('approved', 'auto_approved') + AND n.full_content IS NOT NULL + AND LENGTH(n.full_content) > 500 + AND NOT EXISTS ( + SELECT 1 FROM zopk_knowledge_chunks c WHERE c.source_news_id = n.id + ) + ORDER BY n.published_at DESC + LIMIT :limit + '''), {'limit': limit}) + return result.fetchall() + + +def extract_knowledge_from_news(db, news_id: int) -> dict: + """Ekstraktuj wiedzę z pojedynczego newsa.""" + from zopk_knowledge_service import ZOPKKnowledgeService + + service = ZOPKKnowledgeService(db_session=db) + return service.extract_from_news(news_id) + + +def main(): + parser = argparse.ArgumentParser(description='Ekstrakcja wiedzy z newsów ZOPK') + parser.add_argument('--limit', type=int, default=10, help='Limit newsów do przetworzenia') + parser.add_argument('--dry-run', action='store_true', help='Tylko pokaż co by było przetworzone') + args = parser.parse_args() + + db = SessionLocal() + + try: + pending = find_news_pending_extraction(db, args.limit) + logger.info(f"Znaleziono {len(pending)} newsów do ekstrakcji") + + if args.dry_run: + for row in pending: + logger.info(f" [{row.id}] {row.title[:60]}... ({row.content_len} znaków)") + return + + success = 0 + errors = [] + + for row in pending: + logger.info(f"Przetwarzam [{row.id}] {row.title[:50]}...") + try: + result = extract_knowledge_from_news(db, row.id) + if result.success: + logger.info(f" ✅ Chunks: {result.chunks_created}, Encje: {result.entities_created}, Fakty: {result.facts_created}") + success += 1 + else: + errors.append(f"[{row.id}] {result.error or 'Unknown error'}") + logger.warning(f" ❌ {result.error}") + except Exception as e: + errors.append(f"[{row.id}] {str(e)}") + logger.error(f" ❌ Exception: {e}") + + logger.info(f"\n{'='*50}") + logger.info(f"Zakończono: {success}/{len(pending)} sukces") + if errors: + logger.info(f"Błędy ({len(errors)}):") + for err in errors[:5]: + logger.info(f" - {err}") + + finally: + db.close() + + +if __name__ == '__main__': + main() diff --git a/templates/admin/zopk_fact_duplicates.html b/templates/admin/zopk_fact_duplicates.html new file mode 100644 index 0000000..1e8117e --- /dev/null +++ b/templates/admin/zopk_fact_duplicates.html @@ -0,0 +1,217 @@ +{% extends "base.html" %} + +{% block title %}Deduplikacja Faktów - ZOPK{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+ + + + +
+
+ + + 0.7 +
+
+ + +
+ + +
+ +
+
+
-
+
Par duplikatów
+
+
+
-
+
Śr. podobieństwo
+
+
+ +
+
Kliknij "Szukaj duplikatów" aby rozpocząć...
+
+
+{% endblock %} + +{% block extra_js %} +let duplicatesData = []; + +async function loadDuplicates() { + const minSim = document.getElementById('minSimilarity').value; + const factType = document.getElementById('factType').value; + + document.getElementById('duplicatesList').innerHTML = '
Ładowanie...
'; + + try { + const url = `/api/zopk/knowledge/fact-duplicates?min_similarity=${minSim}&fact_type=${factType}&limit=100`; + const response = await fetch(url); + const data = await response.json(); + + if (data.success) { + duplicatesData = data.duplicates; + document.getElementById('totalPairs').textContent = data.count; + + if (data.count > 0) { + const avgSim = duplicatesData.reduce((sum, d) => sum + d.similarity, 0) / data.count; + document.getElementById('avgSimilarity').textContent = (avgSim * 100).toFixed(0) + '%'; + } + + renderDuplicates(); + } + } catch (error) { + document.getElementById('duplicatesList').innerHTML = '
Błąd ładowania: ' + error + '
'; + } +} + +function renderDuplicates() { + if (duplicatesData.length === 0) { + document.getElementById('duplicatesList').innerHTML = '
Brak duplikatów do pokazania
'; + return; + } + + const html = duplicatesData.map((d, idx) => ` +
+
+ ${d.fact1.fact_type || 'fakt'} + ${(d.similarity * 100).toFixed(0)}% podobieństwa +
+
+
+
${escapeHtml(d.fact1.text)}
+
+ ID: ${d.fact1.id} | Ważność: ${(d.fact1.importance_score * 100).toFixed(0)}% + ${d.fact1.is_verified ? ' | ✅ Zweryfikowany' : ''} +
+
+
+
+
${escapeHtml(d.fact2.text)}
+
+ ID: ${d.fact2.id} | Ważność: ${(d.fact2.importance_score * 100).toFixed(0)}% + ${d.fact2.is_verified ? ' | ✅ Zweryfikowany' : ''} +
+
+
+
+ + +
+
+ `).join(''); + + document.getElementById('duplicatesList').innerHTML = html; +} + +function escapeHtml(text) { + const div = document.createElement('div'); + div.textContent = text || ''; + return div.innerHTML; +} + +async function mergeFacts(primaryId, duplicateId, idx) { + try { + const response = await fetch('/api/zopk/knowledge/fact-duplicates/merge', { + method: 'POST', + headers: {'Content-Type': 'application/json', 'X-CSRFToken': '{{ csrf_token() }}'}, + body: JSON.stringify({primary_id: primaryId, duplicate_id: duplicateId}) + }); + const data = await response.json(); + + if (data.success) { + document.getElementById('dup-' + idx).remove(); + duplicatesData.splice(idx, 1); + document.getElementById('totalPairs').textContent = duplicatesData.length; + } else { + alert('Błąd: ' + data.error); + } + } catch (error) { + alert('Błąd: ' + error); + } +} + +function skipDuplicate(idx) { + document.getElementById('dup-' + idx).remove(); + duplicatesData.splice(idx, 1); + document.getElementById('totalPairs').textContent = duplicatesData.length; +} + +async function mergeAllHigh() { + const highSim = duplicatesData.filter(d => d.similarity >= 0.9); + if (highSim.length === 0) { + alert('Brak duplikatów z podobieństwem >= 90%'); + return; + } + + if (!confirm(`Połączyć ${highSim.length} par z podobieństwem >= 90%?`)) return; + + let merged = 0; + for (const d of highSim) { + try { + const response = await fetch('/api/zopk/knowledge/fact-duplicates/merge', { + method: 'POST', + headers: {'Content-Type': 'application/json', 'X-CSRFToken': '{{ csrf_token() }}'}, + body: JSON.stringify({primary_id: d.fact1.id, duplicate_id: d.fact2.id}) + }); + const data = await response.json(); + if (data.success) merged++; + } catch (e) {} + } + + alert(`Połączono ${merged}/${highSim.length} par`); + loadDuplicates(); +} +{% endblock %} diff --git a/templates/admin/zopk_knowledge_dashboard.html b/templates/admin/zopk_knowledge_dashboard.html index bc22bdd..941af2d 100644 --- a/templates/admin/zopk_knowledge_dashboard.html +++ b/templates/admin/zopk_knowledge_dashboard.html @@ -345,6 +345,20 @@ + + + + + + + + @@ -374,6 +388,20 @@ + + + + +
+

✅ Status weryfikacji

+
+
Ładowanie statystyk weryfikacji...
+
+
{% endblock %} @@ -536,4 +572,105 @@ async function generateEmbeddings() { alert('Błąd: ' + error.message); } } + +async function autoVerifyEntities() { + if (!confirm('Auto-weryfikować encje z ≥5 wzmiankami?')) return; + + try { + const response = await fetch('/api/zopk/knowledge/auto-verify/entities', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-CSRFToken': '{{ csrf_token() }}' + }, + body: JSON.stringify({ min_mentions: 5, limit: 100 }) + }); + + const data = await response.json(); + if (data.success) { + alert(`✅ Zweryfikowano ${data.verified_count} encji`); + loadStats(); + loadVerificationStats(); + } else { + alert('Błąd: ' + data.error); + } + } catch (error) { + alert('Błąd: ' + error.message); + } +} + +async function autoVerifyFacts() { + if (!confirm('Auto-weryfikować fakty z ważnością ≥70%?')) return; + + try { + const response = await fetch('/api/zopk/knowledge/auto-verify/facts', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'X-CSRFToken': '{{ csrf_token() }}' + }, + body: JSON.stringify({ min_importance: 0.7, limit: 200 }) + }); + + const data = await response.json(); + if (data.success) { + alert(`✅ Zweryfikowano ${data.verified_count} faktów`); + loadStats(); + loadVerificationStats(); + } else { + alert('Błąd: ' + data.error); + } + } catch (error) { + alert('Błąd: ' + error.message); + } +} + +async function loadVerificationStats() { + try { + const response = await fetch('/api/zopk/knowledge/dashboard-stats'); + const data = await response.json(); + + if (data.success) { + renderVerificationStats(data); + } + } catch (error) { + console.error('Error loading verification stats:', error); + } +} + +function renderVerificationStats(data) { + const stats = data.verification || {}; + const html = ` +
+
🏢
+
${stats.entities_verified || 0}/${stats.entities_total || 0}
+
Encje zweryfikowane
+
${stats.entities_pending || 0} oczekuje
+
+
+
📌
+
${stats.facts_verified || 0}/${stats.facts_total || 0}
+
Fakty zweryfikowane
+
${stats.facts_pending || 0} oczekuje
+
+
+
📄
+
${stats.chunks_verified || 0}/${stats.chunks_total || 0}
+
Chunks zweryfikowane
+
${stats.chunks_pending || 0} oczekuje
+
+
+
🔗
+
${stats.relations_verified || 0}/${stats.relations_total || 0}
+
Relacje zweryfikowane
+
${stats.relations_pending || 0} oczekuje
+
+ `; + document.getElementById('verificationStats').innerHTML = html; +} + +// Load verification stats on page load +document.addEventListener('DOMContentLoaded', function() { + loadVerificationStats(); +}); {% endblock %} diff --git a/templates/admin/zopk_timeline.html b/templates/admin/zopk_timeline.html new file mode 100644 index 0000000..e9b659e --- /dev/null +++ b/templates/admin/zopk_timeline.html @@ -0,0 +1,302 @@ +{% extends "base.html" %} + +{% block title %}Timeline ZOPK - Roadmapa{% endblock %} + +{% block extra_css %} + +{% endblock %} + +{% block content %} +
+ + + + +
+
Energia jądrowa
+
Offshore wind
+
Infrastruktura
+
Obronność
+
Inne
+
+ +
+
+
+
Ładowanie...
+
+
+
+ + + +{% endblock %} + +{% block extra_js %} +let milestones = []; +let editingId = null; + +async function loadMilestones() { + try { + const response = await fetch('/api/zopk/milestones'); + const data = await response.json(); + + if (data.success) { + milestones = data.milestones; + renderTimeline(); + } + } catch (error) { + document.getElementById('timelineItems').innerHTML = '
Błąd ładowania: ' + error + '
'; + } +} + +function renderTimeline() { + if (milestones.length === 0) { + document.getElementById('timelineItems').innerHTML = '
Brak kamieni milowych. Dodaj pierwszy!
'; + return; + } + + const statusLabels = {planned: 'Planowane', in_progress: 'W trakcie', completed: 'Zakończone', delayed: 'Opóźnione'}; + const categoryLabels = {nuclear: 'Energia jądrowa', offshore: 'Offshore', infrastructure: 'Infrastruktura', defense: 'Obronność', other: 'Inne'}; + + const html = milestones.map(m => ` +
+
+
+
${formatDate(m.target_date)}
+
${escapeHtml(m.title)}
+
${escapeHtml(m.description || '')}
+
+ ${statusLabels[m.status] || m.status} + ${categoryLabels[m.category] || m.category} + + +
+
+
+
+
+ `).join(''); + + document.getElementById('timelineItems').innerHTML = html; +} + +function formatDate(dateStr) { + if (!dateStr) return 'Brak daty'; + const d = new Date(dateStr); + return d.toLocaleDateString('pl-PL', {year: 'numeric', month: 'long'}); +} + +function escapeHtml(text) { + const div = document.createElement('div'); + div.textContent = text || ''; + return div.innerHTML; +} + +function openAddModal() { + editingId = null; + document.getElementById('modalTitle').textContent = 'Dodaj kamień milowy'; + document.getElementById('milestoneForm').reset(); + document.getElementById('addModal').classList.add('active'); +} + +function editMilestone(id) { + const m = milestones.find(x => x.id === id); + if (!m) return; + + editingId = id; + document.getElementById('modalTitle').textContent = 'Edytuj kamień milowy'; + document.getElementById('title').value = m.title; + document.getElementById('description').value = m.description || ''; + document.getElementById('category').value = m.category; + document.getElementById('status').value = m.status; + document.getElementById('targetDate').value = m.target_date || ''; + document.getElementById('actualDate').value = m.actual_date || ''; + document.getElementById('sourceUrl').value = m.source_url || ''; + document.getElementById('addModal').classList.add('active'); +} + +function closeModal() { + document.getElementById('addModal').classList.remove('active'); +} + +async function saveMilestone(e) { + e.preventDefault(); + + const data = { + title: document.getElementById('title').value, + description: document.getElementById('description').value, + category: document.getElementById('category').value, + status: document.getElementById('status').value, + target_date: document.getElementById('targetDate').value || null, + actual_date: document.getElementById('actualDate').value || null, + source_url: document.getElementById('sourceUrl').value || null + }; + + try { + const url = editingId ? `/api/zopk/milestones/${editingId}` : '/api/zopk/milestones'; + const method = editingId ? 'PUT' : 'POST'; + + const response = await fetch(url, { + method: method, + headers: {'Content-Type': 'application/json', 'X-CSRFToken': '{{ csrf_token() }}'}, + body: JSON.stringify(data) + }); + const result = await response.json(); + + if (result.success) { + closeModal(); + loadMilestones(); + } else { + alert('Błąd: ' + result.error); + } + } catch (error) { + alert('Błąd: ' + error); + } +} + +async function deleteMilestone(id) { + if (!confirm('Usunąć ten kamień milowy?')) return; + + try { + const response = await fetch(`/api/zopk/milestones/${id}`, { + method: 'DELETE', + headers: {'X-CSRFToken': '{{ csrf_token() }}'} + }); + const result = await response.json(); + + if (result.success) { + loadMilestones(); + } else { + alert('Błąd: ' + result.error); + } + } catch (error) { + alert('Błąd: ' + error); + } +} + +// Init +loadMilestones(); +{% endblock %} diff --git a/zopk_knowledge_service.py b/zopk_knowledge_service.py index 72ee277..e92abfc 100644 --- a/zopk_knowledge_service.py +++ b/zopk_knowledge_service.py @@ -1953,3 +1953,156 @@ def get_entity_merge_preview( 'new_mentions_count': primary.mentions_count + duplicate.mentions_count } } + +# ============================================================ +# FACT DEDUPLICATION +# ============================================================ + +def find_duplicate_facts( + db_session, + min_similarity: float = 0.7, + limit: int = 100, + fact_type: Optional[str] = None +) -> List[Dict]: + """Find potential duplicate facts using text similarity.""" + from sqlalchemy import text + + type_filter = f"AND f1.fact_type = '{fact_type}'" if fact_type else "" + + query = text(f""" + SELECT + f1.id as id1, f1.full_text as text1, f1.fact_type as type1, + f1.is_verified as verified1, f1.importance_score as score1, + f2.id as id2, f2.full_text as text2, f2.fact_type as type2, + f2.is_verified as verified2, f2.importance_score as score2, + similarity(LOWER(f1.full_text), LOWER(f2.full_text)) as sim + FROM zopk_knowledge_facts f1 + JOIN zopk_knowledge_facts f2 ON f1.id < f2.id + WHERE similarity(LOWER(f1.full_text), LOWER(f2.full_text)) >= :min_sim + {type_filter} + ORDER BY sim DESC, GREATEST(f1.importance_score, f2.importance_score) DESC + LIMIT :limit + """) + + result = db_session.execute(query, {'min_sim': min_similarity, 'limit': limit}) + + duplicates = [] + for row in result: + duplicates.append({ + 'fact1': { + 'id': row.id1, 'text': row.text1, 'fact_type': row.type1, + 'is_verified': row.verified1, + 'importance_score': float(row.score1) if row.score1 else 0 + }, + 'fact2': { + 'id': row.id2, 'text': row.text2, 'fact_type': row.type2, + 'is_verified': row.verified2, + 'importance_score': float(row.score2) if row.score2 else 0 + }, + 'similarity': float(row.sim) + }) + return duplicates + + +def merge_facts(db_session, primary_id: int, duplicate_id: int, new_text: Optional[str] = None) -> Dict: + """Merge duplicate fact into primary.""" + primary = db_session.query(ZOPKKnowledgeFact).get(primary_id) + duplicate = db_session.query(ZOPKKnowledgeFact).get(duplicate_id) + + if not primary: + return {'success': False, 'error': f'Primary fact {primary_id} not found'} + if not duplicate: + return {'success': False, 'error': f'Duplicate fact {duplicate_id} not found'} + + try: + if new_text: + primary.full_text = new_text + if duplicate.importance_score and (not primary.importance_score or duplicate.importance_score > primary.importance_score): + primary.importance_score = duplicate.importance_score + if duplicate.confidence_score and (not primary.confidence_score or duplicate.confidence_score > primary.confidence_score): + primary.confidence_score = duplicate.confidence_score + if duplicate.is_verified: + primary.is_verified = True + + db_session.delete(duplicate) + db_session.commit() + return {'success': True, 'primary_id': primary_id, 'deleted_id': duplicate_id} + except Exception as e: + db_session.rollback() + return {'success': False, 'error': str(e)} + + +# ============================================================ +# AUTO-VERIFICATION +# ============================================================ + +def auto_verify_top_entities(db_session, min_mentions: int = 5, limit: int = 100) -> Dict: + """Auto-verify entities with high mention counts.""" + entities = db_session.query(ZOPKKnowledgeEntity).filter( + ZOPKKnowledgeEntity.is_verified == False, + ZOPKKnowledgeEntity.mentions_count >= min_mentions + ).order_by(ZOPKKnowledgeEntity.mentions_count.desc()).limit(limit).all() + + for entity in entities: + entity.is_verified = True + db_session.commit() + + return {'success': True, 'verified_count': len(entities), 'min_mentions': min_mentions} + + +def auto_verify_top_facts(db_session, min_importance: float = 0.7, limit: int = 200) -> Dict: + """Auto-verify facts with high importance scores.""" + facts = db_session.query(ZOPKKnowledgeFact).filter( + ZOPKKnowledgeFact.is_verified == False, + ZOPKKnowledgeFact.importance_score >= min_importance + ).order_by(ZOPKKnowledgeFact.importance_score.desc()).limit(limit).all() + + for fact in facts: + fact.is_verified = True + db_session.commit() + + return {'success': True, 'verified_count': len(facts), 'min_importance': min_importance} + + +# ============================================================ +# DASHBOARD STATS +# ============================================================ + +def get_knowledge_dashboard_stats(db_session) -> Dict: + """Get comprehensive stats for knowledge dashboard.""" + from sqlalchemy import func, text + + chunks_total = db_session.query(func.count(ZOPKKnowledgeChunk.id)).scalar() or 0 + chunks_verified = db_session.query(func.count(ZOPKKnowledgeChunk.id)).filter(ZOPKKnowledgeChunk.is_verified == True).scalar() or 0 + chunks_with_embedding = db_session.query(func.count(ZOPKKnowledgeChunk.id)).filter(ZOPKKnowledgeChunk.embedding.isnot(None)).scalar() or 0 + + entities_total = db_session.query(func.count(ZOPKKnowledgeEntity.id)).scalar() or 0 + entities_verified = db_session.query(func.count(ZOPKKnowledgeEntity.id)).filter(ZOPKKnowledgeEntity.is_verified == True).scalar() or 0 + + facts_total = db_session.query(func.count(ZOPKKnowledgeFact.id)).scalar() or 0 + facts_verified = db_session.query(func.count(ZOPKKnowledgeFact.id)).filter(ZOPKKnowledgeFact.is_verified == True).scalar() or 0 + + news_total = db_session.execute(text("SELECT COUNT(*) FROM zopk_news WHERE status IN ('approved', 'auto_approved')")).scalar() or 0 + news_with_extraction = db_session.execute(text(''' + SELECT COUNT(DISTINCT n.id) FROM zopk_news n + JOIN zopk_knowledge_chunks c ON c.source_news_id = n.id + WHERE n.status IN ('approved', 'auto_approved') + ''')).scalar() or 0 + + entity_types = db_session.execute(text('SELECT entity_type, COUNT(*) FROM zopk_knowledge_entities GROUP BY entity_type ORDER BY 2 DESC')).fetchall() + fact_types = db_session.execute(text('SELECT fact_type, COUNT(*) FROM zopk_knowledge_facts GROUP BY fact_type ORDER BY 2 DESC')).fetchall() + + top_entities = db_session.query(ZOPKKnowledgeEntity).order_by(ZOPKKnowledgeEntity.mentions_count.desc()).limit(10).all() + + return { + 'chunks': {'total': chunks_total, 'verified': chunks_verified, 'with_embedding': chunks_with_embedding, + 'verified_pct': round(100 * chunks_verified / chunks_total, 1) if chunks_total else 0}, + 'entities': {'total': entities_total, 'verified': entities_verified, + 'verified_pct': round(100 * entities_verified / entities_total, 1) if entities_total else 0, + 'by_type': [{'type': r[0], 'count': r[1]} for r in entity_types]}, + 'facts': {'total': facts_total, 'verified': facts_verified, + 'verified_pct': round(100 * facts_verified / facts_total, 1) if facts_total else 0, + 'by_type': [{'type': r[0] or 'unknown', 'count': r[1]} for r in fact_types]}, + 'news': {'total': news_total, 'with_extraction': news_with_extraction, 'pending': news_total - news_with_extraction}, + 'top_entities': [{'id': e.id, 'name': e.name, 'type': e.entity_type, 'mentions': e.mentions_count} for e in top_entities] + }