feat: AI-based news relevance evaluation using Gemini

- Add ai_relevant, ai_evaluation_reason, ai_evaluated_at columns to zopk_news
- Add evaluate_news_relevance() and evaluate_pending_news() functions
- Add /admin/zopk/news/evaluate-ai endpoint
- Add AI filter tiles (Pasuje wg AI, Nie pasuje wg AI)
- Add "Oceń przez AI" button with progress feedback
- Show AI evaluation badge on news items
- Add new sources: Norda FM, Twoja Telewizja Morska, Nadmorski24.pl, Facebook (Samsonowicz)

AI evaluates news against ZOPK topics: offshore wind, nuclear plant,
Kongsberg investment, data centers, hydrogen labs, key people.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-11 07:13:08 +01:00
parent 61837c7f0a
commit a0db0af6c8
5 changed files with 475 additions and 2 deletions

51
app.py
View File

@ -7348,19 +7348,32 @@ def admin_zopk():
'old_news': db.query(ZOPKNews).filter(
ZOPKNews.status == 'pending',
ZOPKNews.published_at < datetime(min_year, 1, 1)
).count() if not show_old else 0
).count() if not show_old else 0,
# AI evaluation stats
'ai_relevant': db.query(ZOPKNews).filter(ZOPKNews.ai_relevant == True).count(),
'ai_not_relevant': db.query(ZOPKNews).filter(ZOPKNews.ai_relevant == False).count(),
'ai_not_evaluated': db.query(ZOPKNews).filter(
ZOPKNews.status == 'pending',
ZOPKNews.ai_relevant.is_(None)
).count()
}
# Build news query with filters
news_query = db.query(ZOPKNews)
# Status filter
# Status filter (including AI-based filters)
if status_filter == 'pending':
news_query = news_query.filter(ZOPKNews.status == 'pending')
elif status_filter == 'approved':
news_query = news_query.filter(ZOPKNews.status.in_(['approved', 'auto_approved']))
elif status_filter == 'rejected':
news_query = news_query.filter(ZOPKNews.status == 'rejected')
elif status_filter == 'ai_relevant':
# AI evaluated as relevant (regardless of status)
news_query = news_query.filter(ZOPKNews.ai_relevant == True)
elif status_filter == 'ai_not_relevant':
# AI evaluated as NOT relevant
news_query = news_query.filter(ZOPKNews.ai_relevant == False)
# 'all' - no status filter
# Date filter - exclude old news by default
@ -7626,6 +7639,40 @@ def admin_zopk_reject_old_news():
db.close()
@app.route('/admin/zopk/news/evaluate-ai', methods=['POST'])
@login_required
def admin_zopk_evaluate_ai():
"""Evaluate pending news for ZOPK relevance using Gemini AI"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
from zopk_news_service import evaluate_pending_news
db = SessionLocal()
try:
data = request.get_json() or {}
limit = data.get('limit', 50) # Max 50 to avoid API limits
# Run AI evaluation
result = evaluate_pending_news(db, limit=limit, user_id=current_user.id)
return jsonify({
'success': True,
'total_evaluated': result.get('total_evaluated', 0),
'relevant_count': result.get('relevant_count', 0),
'not_relevant_count': result.get('not_relevant_count', 0),
'errors': result.get('errors', 0),
'message': result.get('message', '')
})
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@app.route('/api/zopk/search-news', methods=['POST'])
@login_required
def api_zopk_search_news():

View File

@ -1779,6 +1779,12 @@ class ZOPKNews(Base):
title_hash = Column(String(64), index=True) # For fuzzy title matching (normalized)
is_auto_verified = Column(Boolean, default=False) # True if 3+ sources confirmed
# AI Relevance Evaluation (Gemini)
ai_relevant = Column(Boolean) # True = relevant to ZOPK, False = not relevant, NULL = not evaluated
ai_evaluation_reason = Column(Text) # AI's explanation of relevance decision
ai_evaluated_at = Column(DateTime) # When AI evaluation was performed
ai_model = Column(String(100)) # Which AI model was used (e.g., gemini-2.0-flash)
# Moderation workflow
status = Column(String(20), default='pending', index=True) # pending, approved, rejected, auto_approved
moderated_by = Column(Integer, ForeignKey('users.id'))

View File

@ -405,6 +405,50 @@ CREATE INDEX IF NOT EXISTS idx_zopk_news_title_hash ON zopk_news(title_hash);
-- Index for confidence score (filtering high-confidence news)
CREATE INDEX IF NOT EXISTS idx_zopk_news_confidence ON zopk_news(confidence_score);
-- ============================================================
-- 12. ALTER TABLE - AI Relevance Evaluation columns
-- ============================================================
-- These columns support AI-based relevance evaluation using Google Gemini
-- AI relevance flag (True = relevant to ZOPK, False = not relevant, NULL = not evaluated)
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_relevant') THEN
ALTER TABLE zopk_news ADD COLUMN ai_relevant BOOLEAN;
END IF;
END $$;
-- AI evaluation reason/explanation
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_evaluation_reason') THEN
ALTER TABLE zopk_news ADD COLUMN ai_evaluation_reason TEXT;
END IF;
END $$;
-- When AI evaluation was performed
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_evaluated_at') THEN
ALTER TABLE zopk_news ADD COLUMN ai_evaluated_at TIMESTAMP;
END IF;
END $$;
-- Which AI model was used for evaluation
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
WHERE table_name = 'zopk_news' AND column_name = 'ai_model') THEN
ALTER TABLE zopk_news ADD COLUMN ai_model VARCHAR(100);
END IF;
END $$;
-- Index for AI relevance filtering
CREATE INDEX IF NOT EXISTS idx_zopk_news_ai_relevant ON zopk_news(ai_relevant);
-- ============================================================
-- MIGRATION COMPLETE
-- ============================================================

View File

@ -121,6 +121,79 @@
box-shadow: var(--shadow-lg);
}
/* AI Action button */
.stat-card.ai-action {
background: linear-gradient(135deg, #8b5cf6 0%, #7c3aed 100%);
color: white;
border: none;
font-family: inherit;
}
.stat-card.ai-action .stat-value {
color: white;
}
.stat-card.ai-action .stat-label {
color: rgba(255,255,255,0.9);
}
.stat-card.ai-action:hover {
background: linear-gradient(135deg, #7c3aed 0%, #6d28d9 100%);
}
.stat-card.ai-action:disabled {
opacity: 0.5;
cursor: not-allowed;
transform: none;
}
.stat-card.ai-action:disabled:hover {
box-shadow: var(--shadow);
}
/* AI evaluation result */
.ai-result {
background: #f3e8ff;
border: 1px solid #c4b5fd;
color: #6b21a8;
padding: var(--spacing-sm) var(--spacing-md);
border-radius: var(--radius);
font-size: var(--font-size-sm);
}
.ai-result.success {
background: #dcfce7;
border-color: #86efac;
color: #166534;
}
.ai-result.error {
background: #fee2e2;
border-color: #fca5a5;
color: #991b1b;
}
/* AI badge in news list */
.ai-badge {
display: inline-flex;
align-items: center;
gap: 4px;
padding: 2px 8px;
border-radius: var(--radius-sm);
font-size: var(--font-size-xs);
font-weight: 500;
}
.ai-badge.relevant {
background: #dcfce7;
color: #166534;
}
.ai-badge.not-relevant {
background: #fee2e2;
color: #991b1b;
}
/* Filters bar */
.filters-bar {
display: flex;
@ -705,6 +778,30 @@
</div>
</div>
<!-- Section 3: AI Evaluation filters -->
<div class="stats-section">
<h3 class="stats-section-title">Ocena AI (Gemini) <small>(kliknij aby wybrać)</small></h3>
<div class="stats-grid" style="grid-template-columns: repeat(4, 1fr); max-width: 800px;">
<a href="?status=ai_relevant" class="stat-card filter-card success {{ 'active' if status_filter == 'ai_relevant' else '' }}">
<div class="stat-value">{{ stats.ai_relevant }}</div>
<div class="stat-label">Pasuje wg AI</div>
</a>
<a href="?status=ai_not_relevant" class="stat-card filter-card danger {{ 'active' if status_filter == 'ai_not_relevant' else '' }}">
<div class="stat-value">{{ stats.ai_not_relevant }}</div>
<div class="stat-label">Nie pasuje wg AI</div>
</a>
<div class="stat-card info-only">
<div class="stat-value">{{ stats.ai_not_evaluated }}</div>
<div class="stat-label">Nieocenione</div>
</div>
<button type="button" class="stat-card filter-card ai-action" onclick="evaluateWithAI()" id="aiEvalBtn" {% if stats.ai_not_evaluated == 0 %}disabled{% endif %}>
<div class="stat-value" style="font-size: var(--font-size-xl);">🤖</div>
<div class="stat-label">Oceń przez AI</div>
</button>
</div>
<div id="aiEvalResult" style="margin-top: var(--spacing-md); display: none;"></div>
</div>
<!-- Old news warning -->
{% if stats.old_news > 0 and not show_old %}
<div class="old-news-warning">
@ -751,6 +848,8 @@
{% if status_filter == 'pending' %}Newsy oczekujące na moderację
{% elif status_filter == 'approved' %}Newsy zatwierdzone
{% elif status_filter == 'rejected' %}Newsy odrzucone
{% elif status_filter == 'ai_relevant' %}🤖 Newsy pasujące wg AI
{% elif status_filter == 'ai_not_relevant' %}🤖 Newsy NIE pasujące wg AI
{% else %}Wszystkie newsy{% endif %}
({{ total_news_filtered }})
</h2>
@ -788,6 +887,12 @@
{% elif news.status == 'rejected' %}
<span class="confidence-badge low">✗ Odrzucony</span>
{% endif %}
{# AI Evaluation badge #}
{% if news.ai_relevant is not none %}
<span class="ai-badge {{ 'relevant' if news.ai_relevant else 'not-relevant' }}" title="{{ news.ai_evaluation_reason or '' }}">
🤖 {{ 'Pasuje' if news.ai_relevant else 'Nie pasuje' }}
</span>
{% endif %}
</div>
</div>
<div class="news-actions">
@ -1060,6 +1165,58 @@ async function rejectOldNews() {
}
}
// AI Evaluation function
async function evaluateWithAI() {
const btn = document.getElementById('aiEvalBtn');
const resultDiv = document.getElementById('aiEvalResult');
if (!confirm('Czy chcesz uruchomić ocenę AI (Gemini) dla nieocenionych newsów?\n\nProces może potrwać kilka minut. Ocenionych zostanie max 50 newsów.')) {
return;
}
btn.disabled = true;
btn.querySelector('.stat-label').textContent = 'Oceniam...';
resultDiv.style.display = 'block';
resultDiv.className = 'ai-result';
resultDiv.innerHTML = '🤖 Trwa ocena newsów przez AI... Proszę czekać.';
try {
const response = await fetch('/admin/zopk/news/evaluate-ai', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': csrfToken
},
body: JSON.stringify({ limit: 50 })
});
const data = await response.json();
if (data.success) {
resultDiv.className = 'ai-result success';
resultDiv.innerHTML = `
✓ ${data.message}<br>
<small>Pasuje: ${data.relevant_count} | Nie pasuje: ${data.not_relevant_count} | Błędy: ${data.errors}</small>
`;
// Refresh page after 3 seconds
setTimeout(() => {
location.reload();
}, 3000);
} else {
resultDiv.className = 'ai-result error';
resultDiv.innerHTML = `✗ Błąd: ${data.error}`;
btn.disabled = false;
btn.querySelector('.stat-label').textContent = 'Oceń przez AI';
}
} catch (error) {
resultDiv.className = 'ai-result error';
resultDiv.innerHTML = `✗ Błąd połączenia: ${error.message}`;
btn.disabled = false;
btn.querySelector('.stat-label').textContent = 'Oceń przez AI';
}
}
// Source names mapping for progress display
const SOURCE_NAMES = {
'brave': '🔍 Brave Search API',
@ -1072,6 +1229,11 @@ const SOURCE_NAMES = {
'google_news_nuclear': '📡 Google News (elektrownia jądrowa)',
'google_news_samsonowicz': '📡 Google News (Samsonowicz)',
'google_news_kongsberg': '📡 Google News (Kongsberg)',
// New local media sources
'google_news_norda_fm': '📻 Norda FM',
'google_news_ttm': '📺 Twoja Telewizja Morska',
'google_news_nadmorski24': '📰 Nadmorski24.pl',
'google_news_samsonowicz_fb': '👤 Facebook (Samsonowicz)',
'google_news_norda': '📡 Google News (Norda Biznes)',
'google_news_spoko': '📡 Google News (Spoko Gospodarcze)'
};

View File

@ -109,6 +109,32 @@ RSS_SOURCES = {
'name': 'Google News',
'type': 'aggregator',
'keywords': []
},
# Regional media (via Google News - site-specific searches)
'google_news_norda_fm': {
'url': 'https://news.google.com/rss/search?q=site:nordafm.pl+OR+%22Norda+FM%22&hl=pl&gl=PL&ceid=PL:pl',
'name': 'Norda FM',
'type': 'local_media',
'keywords': []
},
'google_news_ttm': {
'url': 'https://news.google.com/rss/search?q=site:ttm24.pl+OR+%22Twoja+Telewizja+Morska%22&hl=pl&gl=PL&ceid=PL:pl',
'name': 'Twoja Telewizja Morska',
'type': 'local_media',
'keywords': []
},
'google_news_nadmorski24': {
'url': 'https://news.google.com/rss/search?q=site:nadmorski24.pl&hl=pl&gl=PL&ceid=PL:pl',
'name': 'Nadmorski24.pl',
'type': 'local_media',
'keywords': []
},
# Facebook - Maciej Samsonowicz (via Google search - FB doesn't have RSS)
'google_news_samsonowicz_fb': {
'url': 'https://news.google.com/rss/search?q=%22Maciej+Samsonowicz%22+facebook&hl=pl&gl=PL&ceid=PL:pl',
'name': 'Google News (Facebook Samsonowicz)',
'type': 'aggregator',
'keywords': []
}
}
@ -513,3 +539,191 @@ def search_zopk_news(db_session, query: str = None) -> Dict:
"""
service = ZOPKNewsService(db_session)
return service.search_all_sources(query or 'Zielony Okręg Przemysłowy Kaszubia')
# ============================================================
# AI RELEVANCE EVALUATION (GEMINI)
# ============================================================
ZOPK_AI_EVALUATION_PROMPT = """Jesteś ekspertem ds. analizy wiadomości. Oceń, czy poniższy artykuł/news dotyczy projektu **Zielony Okręg Przemysłowy Kaszubia (ZOPK)** lub związanych z nim tematów.
**ZOPK obejmuje:**
1. Morską energetykę wiatrową na Bałtyku (offshore wind)
2. Elektrownię jądrową w Lubiatowie-Kopalino (Choczewo)
3. Inwestycję Kongsberg w Rumi (przemysł obronny)
4. Centra danych i laboratoria wodorowe
5. Rozwój przemysłowy Kaszub (Wejherowo, Rumia, Gdynia)
6. Kluczowe osoby: Maciej Samsonowicz (koordynator ZOPK), minister Kosiniak-Kamysz
**Artykuł do oceny:**
Tytuł: {title}
Opis: {description}
Źródło: {source}
Data: {date}
**Twoje zadanie:**
1. Oceń czy artykuł dotyczy ZOPK lub powiązanych tematów
2. Odpowiedz TYLKO w formacie JSON (bez żadnego innego tekstu):
{{"relevant": true/false, "reason": "krótkie uzasadnienie po polsku (max 100 znaków)"}}
Przykłady odpowiedzi:
{{"relevant": true, "reason": "Dotyczy inwestycji Kongsberg w Rumi"}}
{{"relevant": false, "reason": "Artykuł o lokalnych wydarzeniach kulturalnych"}}
{{"relevant": true, "reason": "Informacje o farmach wiatrowych na Bałtyku"}}
{{"relevant": false, "reason": "News sportowy bez związku z przemysłem"}}"""
def evaluate_news_relevance(news_item, gemini_service=None) -> Dict:
"""
Evaluate a single news item for ZOPK relevance using Gemini AI.
Args:
news_item: ZOPKNews object or dict with title, description, source_name, published_at
gemini_service: Optional GeminiService instance (uses global if not provided)
Returns:
Dict with keys: relevant (bool), reason (str), evaluated (bool)
"""
import json
# Get Gemini service
if gemini_service is None:
try:
from gemini_service import get_gemini_service
gemini_service = get_gemini_service()
except Exception as e:
logger.error(f"Failed to get Gemini service: {e}")
return {'relevant': None, 'reason': 'Gemini service unavailable', 'evaluated': False}
if gemini_service is None:
return {'relevant': None, 'reason': 'Gemini service not initialized', 'evaluated': False}
# Extract fields from news_item
if hasattr(news_item, 'title'):
title = news_item.title or ''
description = news_item.description or ''
source = news_item.source_name or news_item.source_domain or ''
date = news_item.published_at.strftime('%Y-%m-%d') if news_item.published_at else ''
else:
title = news_item.get('title', '')
description = news_item.get('description', '')
source = news_item.get('source_name', '')
date = news_item.get('published_at', '')
# Build prompt
prompt = ZOPK_AI_EVALUATION_PROMPT.format(
title=title[:500], # Limit length
description=description[:1000] if description else 'Brak opisu',
source=source[:100],
date=date
)
try:
# Call Gemini with low temperature for consistent results
response = gemini_service.generate_text(
prompt,
temperature=0.1,
feature='zopk_news_evaluation'
)
# Parse JSON response
# Try to extract JSON from response (handle markdown code blocks)
json_match = re.search(r'\{[^{}]*\}', response)
if json_match:
result = json.loads(json_match.group())
return {
'relevant': bool(result.get('relevant', False)),
'reason': str(result.get('reason', ''))[:255],
'evaluated': True
}
else:
logger.warning(f"Could not parse Gemini response: {response[:200]}")
return {'relevant': None, 'reason': 'Invalid AI response format', 'evaluated': False}
except json.JSONDecodeError as e:
logger.error(f"JSON decode error: {e}")
return {'relevant': None, 'reason': f'JSON parse error: {str(e)[:50]}', 'evaluated': False}
except Exception as e:
logger.error(f"Gemini evaluation error: {e}")
return {'relevant': None, 'reason': f'AI error: {str(e)[:50]}', 'evaluated': False}
def evaluate_pending_news(db_session, limit: int = 50, user_id: int = None) -> Dict:
"""
Evaluate multiple pending news items for ZOPK relevance.
Args:
db_session: SQLAlchemy session
limit: Max number of items to evaluate (to avoid API limits)
user_id: User triggering the evaluation (for logging)
Returns:
Dict with stats: total_evaluated, relevant_count, not_relevant_count, errors
"""
from database import ZOPKNews
from datetime import datetime
# Get pending news that haven't been AI-evaluated yet
pending_news = db_session.query(ZOPKNews).filter(
ZOPKNews.status == 'pending',
ZOPKNews.ai_relevant.is_(None) # Not yet evaluated
).order_by(ZOPKNews.created_at.desc()).limit(limit).all()
if not pending_news:
return {
'total_evaluated': 0,
'relevant_count': 0,
'not_relevant_count': 0,
'errors': 0,
'message': 'Brak newsów do oceny'
}
# Get Gemini service once
try:
from gemini_service import get_gemini_service
gemini = get_gemini_service()
except Exception as e:
return {
'total_evaluated': 0,
'relevant_count': 0,
'not_relevant_count': 0,
'errors': 1,
'message': f'Gemini service error: {str(e)}'
}
stats = {
'total_evaluated': 0,
'relevant_count': 0,
'not_relevant_count': 0,
'errors': 0
}
for news in pending_news:
result = evaluate_news_relevance(news, gemini)
if result['evaluated']:
news.ai_relevant = result['relevant']
news.ai_evaluation_reason = result['reason']
news.ai_evaluated_at = datetime.now()
news.ai_model = 'gemini-2.0-flash'
stats['total_evaluated'] += 1
if result['relevant']:
stats['relevant_count'] += 1
else:
stats['not_relevant_count'] += 1
else:
stats['errors'] += 1
logger.warning(f"Failed to evaluate news {news.id}: {result['reason']}")
# Commit all changes
try:
db_session.commit()
stats['message'] = f"Oceniono {stats['total_evaluated']} newsów: {stats['relevant_count']} pasuje, {stats['not_relevant_count']} nie pasuje"
except Exception as e:
db_session.rollback()
stats['errors'] += 1
stats['message'] = f'Database error: {str(e)}'
return stats