feat: AI-based news relevance evaluation using Gemini

- Add ai_relevant, ai_evaluation_reason, ai_evaluated_at columns to zopk_news - Add evaluate_news_relevance() and evaluate_pending_news() functions - Add /admin/zopk/news/evaluate-ai endpoint - Add AI filter tiles (Pasuje wg AI, Nie pasuje wg AI) - Add "Oceń przez AI" button with progress feedback - Show AI evaluation badge on news items - Add new sources: Norda FM, Twoja Telewizja Morska, Nadmorski24.pl, Facebook (Samsonowicz) AI evaluates news against ZOPK topics: offshore wind, nuclear plant, Kongsberg investment, data centers, hydrogen labs, key people. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-11 07:13:08 +01:00 · 2026-01-11 07:13:08 +01:00 · a0db0af6c8
commit a0db0af6c8
parent 61837c7f0a
5 changed files with 475 additions and 2 deletions
--- a/app.py
+++ b/app.py
@ -7348,19 +7348,32 @@ def admin_zopk():
            'old_news': db.query(ZOPKNews).filter(
                ZOPKNews.status == 'pending',
                ZOPKNews.published_at < datetime(min_year, 1, 1)
-            ).count() if not show_old else 0
+            ).count() if not show_old else 0,
            # AI evaluation stats
            'ai_relevant': db.query(ZOPKNews).filter(ZOPKNews.ai_relevant == True).count(),
            'ai_not_relevant': db.query(ZOPKNews).filter(ZOPKNews.ai_relevant == False).count(),
            'ai_not_evaluated': db.query(ZOPKNews).filter(
                ZOPKNews.status == 'pending',
                ZOPKNews.ai_relevant.is_(None)
            ).count()
        }
        # Build news query with filters
        news_query = db.query(ZOPKNews)
-        # Status filter
+        # Status filter (including AI-based filters)
        if status_filter == 'pending':
            news_query = news_query.filter(ZOPKNews.status == 'pending')
        elif status_filter == 'approved':
            news_query = news_query.filter(ZOPKNews.status.in_(['approved', 'auto_approved']))
        elif status_filter == 'rejected':
            news_query = news_query.filter(ZOPKNews.status == 'rejected')
        elif status_filter == 'ai_relevant':
            # AI evaluated as relevant (regardless of status)
            news_query = news_query.filter(ZOPKNews.ai_relevant == True)
        elif status_filter == 'ai_not_relevant':
            # AI evaluated as NOT relevant
            news_query = news_query.filter(ZOPKNews.ai_relevant == False)
        # 'all' - no status filter
        # Date filter - exclude old news by default
@ -7626,6 +7639,40 @@ def admin_zopk_reject_old_news():
        db.close()
@app.route('/admin/zopk/news/evaluate-ai', methods=['POST'])
@login_required
 def admin_zopk_evaluate_ai():
    """Evaluate pending news for ZOPK relevance using Gemini AI"""
    if not current_user.is_admin:
        return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
    from zopk_news_service import evaluate_pending_news
    db = SessionLocal()
    try:
        data = request.get_json() or {}
        limit = data.get('limit', 50)  # Max 50 to avoid API limits
        # Run AI evaluation
        result = evaluate_pending_news(db, limit=limit, user_id=current_user.id)
        return jsonify({
            'success': True,
            'total_evaluated': result.get('total_evaluated', 0),
            'relevant_count': result.get('relevant_count', 0),
            'not_relevant_count': result.get('not_relevant_count', 0),
            'errors': result.get('errors', 0),
            'message': result.get('message', '')
        })
    except Exception as e:
        db.rollback()
        return jsonify({'success': False, 'error': str(e)}), 500
    finally:
        db.close()
@app.route('/api/zopk/search-news', methods=['POST'])
@login_required
 def api_zopk_search_news():
--- a/database.py
+++ b/database.py
@ -1779,6 +1779,12 @@ class ZOPKNews(Base):
    title_hash = Column(String(64), index=True)  # For fuzzy title matching (normalized)
    is_auto_verified = Column(Boolean, default=False)  # True if 3+ sources confirmed
    # AI Relevance Evaluation (Gemini)
    ai_relevant = Column(Boolean)  # True = relevant to ZOPK, False = not relevant, NULL = not evaluated
    ai_evaluation_reason = Column(Text)  # AI's explanation of relevance decision
    ai_evaluated_at = Column(DateTime)  # When AI evaluation was performed
    ai_model = Column(String(100))  # Which AI model was used (e.g., gemini-2.0-flash)
    # Moderation workflow
    status = Column(String(20), default='pending', index=True)  # pending, approved, rejected, auto_approved
    moderated_by = Column(Integer, ForeignKey('users.id'))
--- a/database/migrations/005_zopk_knowledge_base.sql
+++ b/database/migrations/005_zopk_knowledge_base.sql
@ -405,6 +405,50 @@ CREATE INDEX IF NOT EXISTS idx_zopk_news_title_hash ON zopk_news(title_hash);
 -- Index for confidence score (filtering high-confidence news)
 CREATE INDEX IF NOT EXISTS idx_zopk_news_confidence ON zopk_news(confidence_score);
 -- ============================================================
 -- 12. ALTER TABLE - AI Relevance Evaluation columns
 -- ============================================================
 -- These columns support AI-based relevance evaluation using Google Gemini
 -- AI relevance flag (True = relevant to ZOPK, False = not relevant, NULL = not evaluated)
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM information_schema.columns
                   WHERE table_name = 'zopk_news' AND column_name = 'ai_relevant') THEN
        ALTER TABLE zopk_news ADD COLUMN ai_relevant BOOLEAN;
    END IF;
 END $$;
 -- AI evaluation reason/explanation
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM information_schema.columns
                   WHERE table_name = 'zopk_news' AND column_name = 'ai_evaluation_reason') THEN
        ALTER TABLE zopk_news ADD COLUMN ai_evaluation_reason TEXT;
    END IF;
 END $$;
 -- When AI evaluation was performed
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM information_schema.columns
                   WHERE table_name = 'zopk_news' AND column_name = 'ai_evaluated_at') THEN
        ALTER TABLE zopk_news ADD COLUMN ai_evaluated_at TIMESTAMP;
    END IF;
 END $$;
 -- Which AI model was used for evaluation
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM information_schema.columns
                   WHERE table_name = 'zopk_news' AND column_name = 'ai_model') THEN
        ALTER TABLE zopk_news ADD COLUMN ai_model VARCHAR(100);
    END IF;
 END $$;
 -- Index for AI relevance filtering
 CREATE INDEX IF NOT EXISTS idx_zopk_news_ai_relevant ON zopk_news(ai_relevant);
 -- ============================================================
 -- MIGRATION COMPLETE
 -- ============================================================
--- a/templates/admin/zopk_dashboard.html
+++ b/templates/admin/zopk_dashboard.html
@ -121,6 +121,79 @@
        box-shadow: var(--shadow-lg);
    }
    /* AI Action button */
    .stat-card.ai-action {
        background: linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%);
        color: white;
        border: none;
        font-family: inherit;
    }
    .stat-card.ai-action .stat-value {
        color: white;
    }
    .stat-card.ai-action .stat-label {
        color: rgba(255,255,255,0.9);
    }
    .stat-card.ai-action:hover {
        background: linear-gradient(135deg, #7c3aed 0%, #6d28d9 100%);
    }
    .stat-card.ai-action:disabled {
        opacity: 0.5;
        cursor: not-allowed;
        transform: none;
    }
    .stat-card.ai-action:disabled:hover {
        box-shadow: var(--shadow);
    }
    /* AI evaluation result */
    .ai-result {
        background: #f3e8ff;
        border: 1px solid #c4b5fd;
        color: #6b21a8;
        padding: var(--spacing-sm) var(--spacing-md);
        border-radius: var(--radius);
        font-size: var(--font-size-sm);
    }
    .ai-result.success {
        background: #dcfce7;
        border-color: #86efac;
        color: #166534;
    }
    .ai-result.error {
        background: #fee2e2;
        border-color: #fca5a5;
        color: #991b1b;
    }
    /* AI badge in news list */
    .ai-badge {
        display: inline-flex;
        align-items: center;
        gap: 4px;
        padding: 2px 8px;
        border-radius: var(--radius-sm);
        font-size: var(--font-size-xs);
        font-weight: 500;
    }
    .ai-badge.relevant {
        background: #dcfce7;
        color: #166534;
    }
    .ai-badge.not-relevant {
        background: #fee2e2;
        color: #991b1b;
    }
    /* Filters bar */
    .filters-bar {
        display: flex;
@ -705,6 +778,30 @@
    </div>
 </div>
 <!-- Section 3: AI Evaluation filters -->
 <div class="stats-section">
    <h3 class="stats-section-title">Ocena AI (Gemini) <small>(kliknij aby wybrać)</small></h3>
    <div class="stats-grid" style="grid-template-columns: repeat(4, 1fr); max-width: 800px;">
        <a href="?status=ai_relevant" class="stat-card filter-card success {{ 'active' if status_filter == 'ai_relevant' else '' }}">
            <div class="stat-value">{{ stats.ai_relevant }}</div>
            <div class="stat-label">Pasuje wg AI</div>
        </a>
        <a href="?status=ai_not_relevant" class="stat-card filter-card danger {{ 'active' if status_filter == 'ai_not_relevant' else '' }}">
            <div class="stat-value">{{ stats.ai_not_relevant }}</div>
            <div class="stat-label">Nie pasuje wg AI</div>
        </a>
        <div class="stat-card info-only">
            <div class="stat-value">{{ stats.ai_not_evaluated }}</div>
            <div class="stat-label">Nieocenione</div>
        </div>
        <button type="button" class="stat-card filter-card ai-action" onclick="evaluateWithAI()" id="aiEvalBtn" {% if stats.ai_not_evaluated == 0 %}disabled{% endif %}>
            <div class="stat-value" style="font-size: var(--font-size-xl);">🤖</div>
            <div class="stat-label">Oceń przez AI</div>
        </button>
    </div>
    <div id="aiEvalResult" style="margin-top: var(--spacing-md); display: none;"></div>
 </div>
 <!-- Old news warning -->
 {% if stats.old_news > 0 and not show_old %}
 <div class="old-news-warning">
@ -751,6 +848,8 @@
        {% if status_filter == 'pending' %}Newsy oczekujące na moderację
        {% elif status_filter == 'approved' %}Newsy zatwierdzone
        {% elif status_filter == 'rejected' %}Newsy odrzucone
        {% elif status_filter == 'ai_relevant' %}🤖 Newsy pasujące wg AI
        {% elif status_filter == 'ai_not_relevant' %}🤖 Newsy NIE pasujące wg AI
        {% else %}Wszystkie newsy{% endif %}
        ({{ total_news_filtered }})
    </h2>
@ -788,6 +887,12 @@
                    {% elif news.status == 'rejected' %}
                    <span class="confidence-badge low">✗ Odrzucony</span>
                    {% endif %}
                    {# AI Evaluation badge #}
                    {% if news.ai_relevant is not none %}
                    <span class="ai-badge {{ 'relevant' if news.ai_relevant else 'not-relevant' }}" title="{{ news.ai_evaluation_reason or '' }}">
                        🤖 {{ 'Pasuje' if news.ai_relevant else 'Nie pasuje' }}
                    </span>
                    {% endif %}
                </div>
            </div>
            <div class="news-actions">
@ -1060,6 +1165,58 @@ async function rejectOldNews() {
    }
 }
 // AI Evaluation function
 async function evaluateWithAI() {
    const btn = document.getElementById('aiEvalBtn');
    const resultDiv = document.getElementById('aiEvalResult');
    if (!confirm('Czy chcesz uruchomić ocenę AI (Gemini) dla nieocenionych newsów?\n\nProces może potrwać kilka minut. Ocenionych zostanie max 50 newsów.')) {
        return;
    }
    btn.disabled = true;
    btn.querySelector('.stat-label').textContent = 'Oceniam...';
    resultDiv.style.display = 'block';
    resultDiv.className = 'ai-result';
    resultDiv.innerHTML = '🤖 Trwa ocena newsów przez AI... Proszę czekać.';
    try {
        const response = await fetch('/admin/zopk/news/evaluate-ai', {
            method: 'POST',
            headers: {
                'Content-Type': 'application/json',
                'X-CSRFToken': csrfToken
            },
            body: JSON.stringify({ limit: 50 })
        });
        const data = await response.json();
        if (data.success) {
            resultDiv.className = 'ai-result success';
            resultDiv.innerHTML = `
                ✓ ${data.message}<br>
                <small>Pasuje: ${data.relevant_count} | Nie pasuje: ${data.not_relevant_count} | Błędy: ${data.errors}</small>
            `;
            // Refresh page after 3 seconds
            setTimeout(() => {
                location.reload();
            }, 3000);
        } else {
            resultDiv.className = 'ai-result error';
            resultDiv.innerHTML = `✗ Błąd: ${data.error}`;
            btn.disabled = false;
            btn.querySelector('.stat-label').textContent = 'Oceń przez AI';
        }
    } catch (error) {
        resultDiv.className = 'ai-result error';
        resultDiv.innerHTML = `✗ Błąd połączenia: ${error.message}`;
        btn.disabled = false;
        btn.querySelector('.stat-label').textContent = 'Oceń przez AI';
    }
 }
 // Source names mapping for progress display
 const SOURCE_NAMES = {
    'brave': '🔍 Brave Search API',
@ -1072,6 +1229,11 @@ const SOURCE_NAMES = {
    'google_news_nuclear': '📡 Google News (elektrownia jądrowa)',
    'google_news_samsonowicz': '📡 Google News (Samsonowicz)',
    'google_news_kongsberg': '📡 Google News (Kongsberg)',
    // New local media sources
    'google_news_norda_fm': '📻 Norda FM',
    'google_news_ttm': '📺 Twoja Telewizja Morska',
    'google_news_nadmorski24': '📰 Nadmorski24.pl',
    'google_news_samsonowicz_fb': '👤 Facebook (Samsonowicz)',
    'google_news_norda': '📡 Google News (Norda Biznes)',
    'google_news_spoko': '📡 Google News (Spoko Gospodarcze)'
 };
--- a/zopk_news_service.py
+++ b/zopk_news_service.py
@ -109,6 +109,32 @@ RSS_SOURCES = {
        'name': 'Google News',
        'type': 'aggregator',
        'keywords': []
    },
    # Regional media (via Google News - site-specific searches)
    'google_news_norda_fm': {
        'url': 'https://news.google.com/rss/search?q=site:nordafm.pl+OR+%22Norda+FM%22&hl=pl&gl=PL&ceid=PL:pl',
        'name': 'Norda FM',
        'type': 'local_media',
        'keywords': []
    },
    'google_news_ttm': {
        'url': 'https://news.google.com/rss/search?q=site:ttm24.pl+OR+%22Twoja+Telewizja+Morska%22&hl=pl&gl=PL&ceid=PL:pl',
        'name': 'Twoja Telewizja Morska',
        'type': 'local_media',
        'keywords': []
    },
    'google_news_nadmorski24': {
        'url': 'https://news.google.com/rss/search?q=site:nadmorski24.pl&hl=pl&gl=PL&ceid=PL:pl',
        'name': 'Nadmorski24.pl',
        'type': 'local_media',
        'keywords': []
    },
    # Facebook - Maciej Samsonowicz (via Google search - FB doesn't have RSS)
    'google_news_samsonowicz_fb': {
        'url': 'https://news.google.com/rss/search?q=%22Maciej+Samsonowicz%22+facebook&hl=pl&gl=PL&ceid=PL:pl',
        'name': 'Google News (Facebook Samsonowicz)',
        'type': 'aggregator',
        'keywords': []
    }
 }
@ -513,3 +539,191 @@ def search_zopk_news(db_session, query: str = None) -> Dict:
    """
    service = ZOPKNewsService(db_session)
    return service.search_all_sources(query or 'Zielony Okręg Przemysłowy Kaszubia')
 # ============================================================
 # AI RELEVANCE EVALUATION (GEMINI)
 # ============================================================
 ZOPK_AI_EVALUATION_PROMPT = """Jesteś ekspertem ds. analizy wiadomości. Oceń, czy poniższy artykuł/news dotyczy projektu **Zielony Okręg Przemysłowy Kaszubia (ZOPK)** lub związanych z nim tematów.
 **ZOPK obejmuje:**
 1. Morską energetykę wiatrową na Bałtyku (offshore wind)
 2. Elektrownię jądrową w Lubiatowie-Kopalino (Choczewo)
 3. Inwestycję Kongsberg w Rumi (przemysł obronny)
 4. Centra danych i laboratoria wodorowe
 5. Rozwój przemysłowy Kaszub (Wejherowo, Rumia, Gdynia)
 6. Kluczowe osoby: Maciej Samsonowicz (koordynator ZOPK), minister Kosiniak-Kamysz
 **Artykuł do oceny:**
 Tytuł: {title}
 Opis: {description}
 Źródło: {source}
 Data: {date}
 **Twoje zadanie:**
 1. Oceń czy artykuł dotyczy ZOPK lub powiązanych tematów
 2. Odpowiedz TYLKO w formacie JSON (bez żadnego innego tekstu):
 {{"relevant": true/false, "reason": "krótkie uzasadnienie po polsku (max 100 znaków)"}}
 Przykłady odpowiedzi:
 {{"relevant": true, "reason": "Dotyczy inwestycji Kongsberg w Rumi"}}
 {{"relevant": false, "reason": "Artykuł o lokalnych wydarzeniach kulturalnych"}}
 {{"relevant": true, "reason": "Informacje o farmach wiatrowych na Bałtyku"}}
 {{"relevant": false, "reason": "News sportowy bez związku z przemysłem"}}"""
 def evaluate_news_relevance(news_item, gemini_service=None) -> Dict:
    """
    Evaluate a single news item for ZOPK relevance using Gemini AI.
    Args:
        news_item: ZOPKNews object or dict with title, description, source_name, published_at
        gemini_service: Optional GeminiService instance (uses global if not provided)
    Returns:
        Dict with keys: relevant (bool), reason (str), evaluated (bool)
    """
    import json
    # Get Gemini service
    if gemini_service is None:
        try:
            from gemini_service import get_gemini_service
            gemini_service = get_gemini_service()
        except Exception as e:
            logger.error(f"Failed to get Gemini service: {e}")
            return {'relevant': None, 'reason': 'Gemini service unavailable', 'evaluated': False}
    if gemini_service is None:
        return {'relevant': None, 'reason': 'Gemini service not initialized', 'evaluated': False}
    # Extract fields from news_item
    if hasattr(news_item, 'title'):
        title = news_item.title or ''
        description = news_item.description or ''
        source = news_item.source_name or news_item.source_domain or ''
        date = news_item.published_at.strftime('%Y-%m-%d') if news_item.published_at else ''
    else:
        title = news_item.get('title', '')
        description = news_item.get('description', '')
        source = news_item.get('source_name', '')
        date = news_item.get('published_at', '')
    # Build prompt
    prompt = ZOPK_AI_EVALUATION_PROMPT.format(
        title=title[:500],  # Limit length
        description=description[:1000] if description else 'Brak opisu',
        source=source[:100],
        date=date
    )
    try:
        # Call Gemini with low temperature for consistent results
        response = gemini_service.generate_text(
            prompt,
            temperature=0.1,
            feature='zopk_news_evaluation'
        )
        # Parse JSON response
        # Try to extract JSON from response (handle markdown code blocks)
        json_match = re.search(r'\{[^{}]*\}', response)
        if json_match:
            result = json.loads(json_match.group())
            return {
                'relevant': bool(result.get('relevant', False)),
                'reason': str(result.get('reason', ''))[:255],
                'evaluated': True
            }
        else:
            logger.warning(f"Could not parse Gemini response: {response[:200]}")
            return {'relevant': None, 'reason': 'Invalid AI response format', 'evaluated': False}
    except json.JSONDecodeError as e:
        logger.error(f"JSON decode error: {e}")
        return {'relevant': None, 'reason': f'JSON parse error: {str(e)[:50]}', 'evaluated': False}
    except Exception as e:
        logger.error(f"Gemini evaluation error: {e}")
        return {'relevant': None, 'reason': f'AI error: {str(e)[:50]}', 'evaluated': False}
 def evaluate_pending_news(db_session, limit: int = 50, user_id: int = None) -> Dict:
    """
    Evaluate multiple pending news items for ZOPK relevance.
    Args:
        db_session: SQLAlchemy session
        limit: Max number of items to evaluate (to avoid API limits)
        user_id: User triggering the evaluation (for logging)
    Returns:
        Dict with stats: total_evaluated, relevant_count, not_relevant_count, errors
    """
    from database import ZOPKNews
    from datetime import datetime
    # Get pending news that haven't been AI-evaluated yet
    pending_news = db_session.query(ZOPKNews).filter(
        ZOPKNews.status == 'pending',
        ZOPKNews.ai_relevant.is_(None)  # Not yet evaluated
    ).order_by(ZOPKNews.created_at.desc()).limit(limit).all()
    if not pending_news:
        return {
            'total_evaluated': 0,
            'relevant_count': 0,
            'not_relevant_count': 0,
            'errors': 0,
            'message': 'Brak newsów do oceny'
        }
    # Get Gemini service once
    try:
        from gemini_service import get_gemini_service
        gemini = get_gemini_service()
    except Exception as e:
        return {
            'total_evaluated': 0,
            'relevant_count': 0,
            'not_relevant_count': 0,
            'errors': 1,
            'message': f'Gemini service error: {str(e)}'
        }
    stats = {
        'total_evaluated': 0,
        'relevant_count': 0,
        'not_relevant_count': 0,
        'errors': 0
    }
    for news in pending_news:
        result = evaluate_news_relevance(news, gemini)
        if result['evaluated']:
            news.ai_relevant = result['relevant']
            news.ai_evaluation_reason = result['reason']
            news.ai_evaluated_at = datetime.now()
            news.ai_model = 'gemini-2.0-flash'
            stats['total_evaluated'] += 1
            if result['relevant']:
                stats['relevant_count'] += 1
            else:
                stats['not_relevant_count'] += 1
        else:
            stats['errors'] += 1
            logger.warning(f"Failed to evaluate news {news.id}: {result['reason']}")
    # Commit all changes
    try:
        db_session.commit()
        stats['message'] = f"Oceniono {stats['total_evaluated']} newsów: {stats['relevant_count']} pasuje, {stats['not_relevant_count']} nie pasuje"
    except Exception as e:
        db_session.rollback()
        stats['errors'] += 1
        stats['message'] = f'Database error: {str(e)}'
    return stats