feat(zopk): Improve AI scoring and auto-approve threshold

Changes:
- Lower auto-approve threshold from 4★ to 3★ (verified 2026-01-15)
- Add detailed progress bar for ZOPK search process
- Add auto-approved articles list with star ratings
- Document ZOPK topics (ZOP Kaszubia) in CLAUDE.md
- Add 8-second countdown before auto-refresh

Technical:
- zopk_news_service.py: Changed score threshold from >=4 to >=3
- Templates: New CSS for progress phases and results display
- CLAUDE.md: Added "ZOP Kaszubia News" section with topic guidelines

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-15 05:05:41 +01:00
parent 90da6b9c64
commit db02d3660f
6 changed files with 1852 additions and 172 deletions

View File

@ -788,6 +788,56 @@ AKTUALNOŚCI
# 0 */6 * * * cd /var/www/nordabiznes && /var/www/nordabiznes/venv/bin/python3 scripts/fetch_company_news.py --all >> /var/log/nordabiznes/news_fetch.log 2>&1
```
## ZOP Kaszubia News (ZOPK)
### Opis
System monitoringu newsów związanych z projektem **Zielony Okręg Przemysłowy Kaszubia**.
Panel admina: `/admin/zopk/news`
### Tematy ZOP Kaszubia (istotne)
- **Zielony Okręg Przemysłowy Kaszubia** - główny projekt
- **Elektrownia jądrowa na Pomorzu** - Lubiatowo-Kopalino
- **Offshore wind Bałtyk** - farmy wiatrowe, Baltic Power, Baltica
- **Via Pomerania** - droga ekspresowa Ustka-Bydgoszcz
- **Droga Czerwona** - połączenie z Portem Gdynia
- **Kongsberg** - norweskie inwestycje zbrojeniowe w Rumi
- **Pakt Bezpieczeństwa Pomorze Środkowe** - MON
- **Izba Przedsiębiorców NORDA** - lokalne organizacje biznesowe
### Tematy NIEZWIĄZANE (do odrzucenia)
- Turystyka na Kaszubach (kuligi, lodowiska, hotele)
- Polityka ogólnopolska (Ziobro, polexit)
- Inne regiony Polski (Śląsk, Lubuskie, Małopolska)
- Wypadki i wydarzenia kryminalne
- Clickbait i lifestyle
### Reguły auto-approve (WAŻNE!)
**Próg auto-approve: score >= 3** (verified 2026-01-15)
| Score | Status | Opis |
|-------|--------|------|
| 1-2 | `pending` | Wymaga ręcznej moderacji |
| 3-5 | `auto_approved` | Automatycznie zatwierdzony |
**Plik:** `zopk_news_service.py` (linie 890, 1124, 1145)
### Tabela zopk_news
```sql
zopk_news (
id, title, url, description,
source_name, source_domain, source_type,
ai_relevance_score INTEGER, -- 1-5 gwiazdek
status VARCHAR(20), -- pending, auto_approved, approved, rejected
confidence_score, source_count,
created_at, updated_at
)
```
## Social Media - Stan aktualny
### Statystyki (2025-12-29)

128
app.py
View File

@ -10245,6 +10245,7 @@ def admin_zopk_news():
try:
page = request.args.get('page', 1, type=int)
status = request.args.get('status', 'all')
stars = request.args.get('stars', 'all') # 'all', '1'-'5', 'none'
sort_by = request.args.get('sort', 'date') # 'date', 'score', 'title'
sort_dir = request.args.get('dir', 'desc') # 'asc', 'desc'
per_page = 50
@ -10253,6 +10254,13 @@ def admin_zopk_news():
if status != 'all':
query = query.filter(ZOPKNews.status == status)
# Filter by star rating
if stars == 'none':
query = query.filter(ZOPKNews.ai_relevance_score.is_(None))
elif stars in ['1', '2', '3', '4', '5']:
query = query.filter(ZOPKNews.ai_relevance_score == int(stars))
# 'all' - no filter
# Apply sorting
sort_func = desc if sort_dir == 'desc' else asc
if sort_by == 'score':
@ -10277,6 +10285,7 @@ def admin_zopk_news():
total_pages=total_pages,
total=total,
current_status=status,
current_stars=stars,
current_sort=sort_by,
current_dir=sort_dir
)
@ -10483,6 +10492,117 @@ def admin_zopk_reject_old_news():
db.close()
@app.route('/admin/zopk/news/star-counts')
@login_required
def admin_zopk_news_star_counts():
"""Get counts of pending news items grouped by star rating"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
from database import ZOPKNews
from sqlalchemy import func
db = SessionLocal()
try:
# Count pending news for each star rating (1-5 and NULL)
counts = {}
# Count for each star 1-5
for star in range(1, 6):
count = db.query(func.count(ZOPKNews.id)).filter(
ZOPKNews.status == 'pending',
ZOPKNews.ai_relevance_score == star
).scalar()
counts[star] = count
# Count for NULL (no AI evaluation)
count_null = db.query(func.count(ZOPKNews.id)).filter(
ZOPKNews.status == 'pending',
ZOPKNews.ai_relevance_score.is_(None)
).scalar()
counts[0] = count_null
return jsonify({
'success': True,
'counts': counts
})
except Exception as e:
logger.error(f"Error getting ZOPK news star counts: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd'}), 500
finally:
db.close()
@app.route('/admin/zopk/news/reject-by-stars', methods=['POST'])
@login_required
def admin_zopk_reject_by_stars():
"""Reject all pending news items with specified star ratings"""
if not current_user.is_admin:
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
from database import ZOPKNews
db = SessionLocal()
try:
data = request.get_json() or {}
stars = data.get('stars', []) # List of star ratings to reject (0 = no rating)
reason = data.get('reason', '')
if not stars:
return jsonify({'success': False, 'error': 'Nie wybrano ocen do odrzucenia'}), 400
# Validate stars input
valid_stars = [s for s in stars if s in [0, 1, 2, 3, 4, 5]]
if not valid_stars:
return jsonify({'success': False, 'error': 'Nieprawidłowe oceny gwiazdkowe'}), 400
# Build query for pending news with specified stars
from sqlalchemy import or_
conditions = []
for star in valid_stars:
if star == 0:
conditions.append(ZOPKNews.ai_relevance_score.is_(None))
else:
conditions.append(ZOPKNews.ai_relevance_score == star)
news_to_reject = db.query(ZOPKNews).filter(
ZOPKNews.status == 'pending',
or_(*conditions)
).all()
count = len(news_to_reject)
# Reject them all
default_reason = f"Masowo odrzucone - oceny: {', '.join(str(s) + '' if s > 0 else 'brak oceny' for s in valid_stars)}"
final_reason = reason if reason else default_reason
for news in news_to_reject:
news.status = 'rejected'
news.moderated_by = current_user.id
news.moderated_at = datetime.now()
news.rejection_reason = final_reason
db.commit()
logger.info(f"Admin {current_user.email} rejected {count} ZOPK news with stars {valid_stars}")
return jsonify({
'success': True,
'message': f'Odrzucono {count} artykułów',
'count': count
})
except Exception as e:
db.rollback()
logger.error(f"Error rejecting ZOPK news by stars: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas odrzucania'}), 500
finally:
db.close()
@app.route('/admin/zopk/news/evaluate-ai', methods=['POST'])
@login_required
def admin_zopk_evaluate_ai():
@ -10620,7 +10740,13 @@ def api_zopk_search_news():
'saved_new': results['saved_new'],
'updated_existing': results['updated_existing'],
'auto_approved': results['auto_approved'],
'source_stats': results['source_stats']
'ai_approved': results.get('ai_approved', 0),
'ai_rejected': results.get('ai_rejected', 0),
'blacklisted': results.get('blacklisted', 0),
'keyword_filtered': results.get('keyword_filtered', 0),
'source_stats': results['source_stats'],
'process_log': results.get('process_log', []),
'auto_approved_articles': results.get('auto_approved_articles', [])
})
except Exception as e:

View File

@ -2041,6 +2041,313 @@ class ZOPKNewsFetchJob(Base):
user = relationship('User', foreign_keys=[triggered_by_user])
# ============================================================
# ZOPK KNOWLEDGE BASE (AI-powered, with pgvector)
# ============================================================
class ZOPKKnowledgeChunk(Base):
"""
Knowledge chunks extracted from approved ZOPK news articles.
Each chunk is a semantically coherent piece of text with embedding vector
for similarity search (RAG - Retrieval Augmented Generation).
Best practices:
- Chunk size: 500-1000 tokens with ~100 token overlap
- Embedding model: text-embedding-004 (768 dimensions)
"""
__tablename__ = 'zopk_knowledge_chunks'
id = Column(Integer, primary_key=True)
# Source tracking
source_news_id = Column(Integer, ForeignKey('zopk_news.id'), nullable=False, index=True)
# Chunk content
content = Column(Text, nullable=False) # The actual text chunk
content_clean = Column(Text) # Cleaned/normalized version for processing
chunk_index = Column(Integer) # Position in the original article (0, 1, 2...)
token_count = Column(Integer) # Approximate token count
# Semantic embedding (pgvector)
# Using 768 dimensions for Google text-embedding-004
# Will be stored as: embedding vector(768)
embedding = Column(Text) # Stored as JSON string, converted to vector for queries
# AI-extracted metadata
chunk_type = Column(String(50)) # narrative, fact, quote, statistic, event, definition
summary = Column(Text) # 1-2 sentence summary
keywords = Column(PG_ARRAY(String(100)) if not IS_SQLITE else Text) # Extracted keywords
language = Column(String(10), default='pl') # pl, en
# Context information
context_date = Column(Date) # Date the information refers to (not article date)
context_location = Column(String(255)) # Geographic location if mentioned
# Quality & relevance
importance_score = Column(Integer) # 1-5, how important this information is
confidence_score = Column(Numeric(3, 2)) # 0.00-1.00, AI confidence in extraction
# Moderation
is_verified = Column(Boolean, default=False) # Human verified
verified_by = Column(Integer, ForeignKey('users.id'))
verified_at = Column(DateTime)
# Processing metadata
extraction_model = Column(String(100)) # gemini-2.0-flash, gpt-4, etc.
extracted_at = Column(DateTime, default=datetime.now)
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
# Relationships
source_news = relationship('ZOPKNews', backref='knowledge_chunks')
verifier = relationship('User', foreign_keys=[verified_by])
class ZOPKKnowledgeEntity(Base):
"""
Named entities extracted from ZOPK knowledge base.
Entities are deduplicated and enriched across all sources.
Types: company, person, place, organization, project, technology
"""
__tablename__ = 'zopk_knowledge_entities'
id = Column(Integer, primary_key=True)
# Entity identification
entity_type = Column(String(50), nullable=False, index=True)
name = Column(String(255), nullable=False)
normalized_name = Column(String(255), index=True) # Lowercase, no special chars (for dedup)
aliases = Column(PG_ARRAY(String(255)) if not IS_SQLITE else Text) # Alternative names
# Description
description = Column(Text) # AI-generated description
short_description = Column(String(500)) # One-liner
# Linking to existing data
company_id = Column(Integer, ForeignKey('companies.id')) # Link to Norda company if exists
zopk_project_id = Column(Integer, ForeignKey('zopk_projects.id')) # Link to ZOPK project
external_url = Column(String(1000)) # Wikipedia, company website, etc.
# Entity metadata (JSONB for flexibility)
metadata = Column(PG_JSONB if not IS_SQLITE else Text) # {role: "CEO", founded: 2020, ...}
# Statistics
mentions_count = Column(Integer, default=0)
first_mentioned_at = Column(DateTime)
last_mentioned_at = Column(DateTime)
# Embedding for entity similarity
embedding = Column(Text) # Entity description embedding
# Quality
is_verified = Column(Boolean, default=False)
merged_into_id = Column(Integer, ForeignKey('zopk_knowledge_entities.id')) # For deduplication
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
# Relationships
company = relationship('Company', foreign_keys=[company_id])
zopk_project = relationship('ZOPKProject', foreign_keys=[zopk_project_id])
merged_into = relationship('ZOPKKnowledgeEntity', remote_side=[id], foreign_keys=[merged_into_id])
class ZOPKKnowledgeFact(Base):
"""
Structured facts extracted from knowledge chunks.
Facts are atomic, verifiable pieces of information.
Examples:
- "ZOPK otrzymał 500 mln PLN dofinansowania w 2024"
- "Port Gdynia jest głównym partnerem projektu"
- "Projekt zakłada utworzenie 5000 miejsc pracy"
"""
__tablename__ = 'zopk_knowledge_facts'
id = Column(Integer, primary_key=True)
# Source
source_chunk_id = Column(Integer, ForeignKey('zopk_knowledge_chunks.id'), nullable=False, index=True)
source_news_id = Column(Integer, ForeignKey('zopk_news.id'), index=True)
# Fact content
fact_type = Column(String(50), nullable=False) # statistic, event, statement, decision, milestone
subject = Column(String(255)) # Who/what the fact is about
predicate = Column(String(100)) # Action/relation type
object = Column(Text) # The actual information
full_text = Column(Text, nullable=False) # Complete fact as sentence
# Structured data (for queryable facts)
numeric_value = Column(Numeric(20, 2)) # If fact contains number
numeric_unit = Column(String(50)) # PLN, EUR, jobs, MW, etc.
date_value = Column(Date) # If fact refers to specific date
# Context
context = Column(Text) # Surrounding context for disambiguation
citation = Column(Text) # Original quote if applicable
# Entities involved (denormalized for quick access)
entities_involved = Column(PG_JSONB if not IS_SQLITE else Text) # [{id: 1, name: "...", type: "company"}, ...]
# Quality & verification
confidence_score = Column(Numeric(3, 2)) # AI confidence
is_verified = Column(Boolean, default=False)
contradicts_fact_id = Column(Integer, ForeignKey('zopk_knowledge_facts.id')) # If contradicted
# Embedding for fact similarity
embedding = Column(Text)
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
# Relationships
source_chunk = relationship('ZOPKKnowledgeChunk', backref='facts')
source_news = relationship('ZOPKNews', backref='facts')
contradicted_by = relationship('ZOPKKnowledgeFact', remote_side=[id], foreign_keys=[contradicts_fact_id])
class ZOPKKnowledgeEntityMention(Base):
"""
Links between knowledge chunks and entities.
Tracks where each entity is mentioned and in what context.
"""
__tablename__ = 'zopk_knowledge_entity_mentions'
id = Column(Integer, primary_key=True)
chunk_id = Column(Integer, ForeignKey('zopk_knowledge_chunks.id'), nullable=False, index=True)
entity_id = Column(Integer, ForeignKey('zopk_knowledge_entities.id'), nullable=False, index=True)
# Mention details
mention_text = Column(String(500)) # Exact text that matched the entity
mention_type = Column(String(50)) # direct, reference, pronoun
start_position = Column(Integer) # Character position in chunk
end_position = Column(Integer)
# Context
sentiment = Column(String(20)) # positive, neutral, negative
role_in_context = Column(String(100)) # subject, object, beneficiary, partner
confidence = Column(Numeric(3, 2)) # Entity linking confidence
created_at = Column(DateTime, default=datetime.now)
# Relationships
chunk = relationship('ZOPKKnowledgeChunk', backref='entity_mentions')
entity = relationship('ZOPKKnowledgeEntity', backref='mentions')
__table_args__ = (
UniqueConstraint('chunk_id', 'entity_id', 'start_position', name='uq_chunk_entity_position'),
)
class ZOPKKnowledgeRelation(Base):
"""
Relationships between entities discovered in the knowledge base.
Forms a knowledge graph of ZOPK ecosystem.
Examples:
- Company A "partner" Company B
- Person X "CEO of" Company Y
- Project Z "funded by" Organization W
"""
__tablename__ = 'zopk_knowledge_relations'
id = Column(Integer, primary_key=True)
# Entities involved
entity_a_id = Column(Integer, ForeignKey('zopk_knowledge_entities.id'), nullable=False, index=True)
entity_b_id = Column(Integer, ForeignKey('zopk_knowledge_entities.id'), nullable=False, index=True)
# Relation definition
relation_type = Column(String(100), nullable=False) # partner, investor, supplier, competitor, subsidiary, employs
relation_subtype = Column(String(100)) # More specific: strategic_partner, minority_investor
is_bidirectional = Column(Boolean, default=False) # True for "partners", False for "invests in"
# Evidence
source_chunk_id = Column(Integer, ForeignKey('zopk_knowledge_chunks.id'))
source_fact_id = Column(Integer, ForeignKey('zopk_knowledge_facts.id'))
evidence_text = Column(Text) # Quote proving the relation
# Temporal aspects
valid_from = Column(Date) # When relation started
valid_until = Column(Date) # When relation ended (NULL = still valid)
is_current = Column(Boolean, default=True)
# Strength & confidence
strength = Column(Integer) # 1-5, how strong the relation is
confidence = Column(Numeric(3, 2)) # AI confidence in the relation
mention_count = Column(Integer, default=1) # How many times this relation was found
# Quality
is_verified = Column(Boolean, default=False)
verified_by = Column(Integer, ForeignKey('users.id'))
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)
# Relationships
entity_a = relationship('ZOPKKnowledgeEntity', foreign_keys=[entity_a_id], backref='relations_as_subject')
entity_b = relationship('ZOPKKnowledgeEntity', foreign_keys=[entity_b_id], backref='relations_as_object')
source_chunk = relationship('ZOPKKnowledgeChunk', backref='discovered_relations')
source_fact = relationship('ZOPKKnowledgeFact', backref='relation_evidence')
verifier = relationship('User', foreign_keys=[verified_by])
__table_args__ = (
UniqueConstraint('entity_a_id', 'entity_b_id', 'relation_type', name='uq_entity_relation'),
)
class ZOPKKnowledgeExtractionJob(Base):
"""
Tracks knowledge extraction jobs from approved articles.
One job per article, tracks progress and results.
"""
__tablename__ = 'zopk_knowledge_extraction_jobs'
id = Column(Integer, primary_key=True)
job_id = Column(String(100), unique=True, nullable=False, index=True)
# Source
news_id = Column(Integer, ForeignKey('zopk_news.id'), nullable=False, index=True)
# Configuration
extraction_model = Column(String(100)) # gemini-2.0-flash
chunk_size = Column(Integer, default=800) # Target tokens per chunk
chunk_overlap = Column(Integer, default=100) # Overlap tokens
# Results
chunks_created = Column(Integer, default=0)
entities_extracted = Column(Integer, default=0)
facts_extracted = Column(Integer, default=0)
relations_discovered = Column(Integer, default=0)
# Costs
tokens_used = Column(Integer, default=0)
cost_cents = Column(Numeric(10, 4), default=0)
# Status
status = Column(String(20), default='pending') # pending, running, completed, failed
error_message = Column(Text)
progress_percent = Column(Integer, default=0)
# Timing
started_at = Column(DateTime)
completed_at = Column(DateTime)
# Trigger
triggered_by = Column(String(50)) # auto (on approval), manual, batch
triggered_by_user = Column(Integer, ForeignKey('users.id'))
created_at = Column(DateTime, default=datetime.now)
# Relationships
news = relationship('ZOPKNews', backref='extraction_jobs')
user = relationship('User', foreign_keys=[triggered_by_user])
# ============================================================
# AI USAGE TRACKING MODELS
# ============================================================

View File

@ -818,6 +818,156 @@
flex-direction: column;
}
}
/* Progress phases (search → filter → AI → save) */
.progress-phases {
display: flex;
gap: var(--spacing-xs);
margin-bottom: var(--spacing-md);
flex-wrap: wrap;
}
.progress-phase {
display: flex;
align-items: center;
gap: 6px;
padding: 6px 12px;
border-radius: var(--radius);
font-size: var(--font-size-xs);
background: rgba(255,255,255,0.1);
opacity: 0.5;
transition: all 0.3s ease;
}
.progress-phase.active {
opacity: 1;
background: rgba(255,255,255,0.25);
animation: pulse 1.5s ease-in-out infinite;
}
.progress-phase.completed {
opacity: 1;
background: rgba(34, 197, 94, 0.3);
}
@keyframes pulse {
0%, 100% { transform: scale(1); }
50% { transform: scale(1.02); }
}
.progress-phase-icon {
font-size: 1em;
}
/* Search results container */
.search-results-container {
margin-top: var(--spacing-lg);
padding: var(--spacing-lg);
background: rgba(255,255,255,0.1);
border-radius: var(--radius-lg);
animation: fadeIn 0.5s ease;
}
@keyframes fadeIn {
from { opacity: 0; transform: translateY(-10px); }
to { opacity: 1; transform: translateY(0); }
}
.search-results-summary {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
gap: var(--spacing-md);
margin-bottom: var(--spacing-lg);
}
.summary-stat {
text-align: center;
padding: var(--spacing-md);
background: rgba(255,255,255,0.1);
border-radius: var(--radius);
}
.summary-stat .value {
font-size: var(--font-size-2xl);
font-weight: 700;
}
.summary-stat .label {
font-size: var(--font-size-xs);
opacity: 0.8;
}
.summary-stat.success .value { color: #86efac; }
.summary-stat.warning .value { color: #fde68a; }
.summary-stat.error .value { color: #fca5a5; }
.summary-stat.info .value { color: #93c5fd; }
/* Auto-approved articles section */
.auto-approved-section {
margin-top: var(--spacing-lg);
padding: var(--spacing-md);
background: rgba(34, 197, 94, 0.15);
border-radius: var(--radius);
border: 1px solid rgba(34, 197, 94, 0.3);
}
.auto-approved-section h4 {
margin-bottom: var(--spacing-md);
font-size: var(--font-size-sm);
}
.auto-approved-list {
display: flex;
flex-direction: column;
gap: var(--spacing-xs);
max-height: 200px;
overflow-y: auto;
}
.auto-approved-item {
display: flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-xs) var(--spacing-sm);
background: rgba(255,255,255,0.1);
border-radius: var(--radius-sm);
font-size: var(--font-size-xs);
}
.auto-approved-item .stars {
color: #fbbf24;
flex-shrink: 0;
}
.auto-approved-item .title {
flex: 1;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.auto-approved-item .source {
color: rgba(255,255,255,0.6);
flex-shrink: 0;
font-size: 10px;
}
/* Refresh countdown */
.refresh-countdown {
margin-top: var(--spacing-lg);
padding: var(--spacing-md);
background: rgba(255,255,255,0.1);
border-radius: var(--radius);
display: flex;
justify-content: space-between;
align-items: center;
font-size: var(--font-size-sm);
}
.refresh-countdown strong {
font-size: var(--font-size-lg);
color: #fde68a;
}
</style>
{% endblock %}
@ -938,13 +1088,28 @@
<div class="progress-bar-container">
<div class="progress-bar-fill" id="progressBar"></div>
</div>
<div class="progress-phases" id="progressPhases">
<!-- Phases will be rendered by JS -->
</div>
<div class="progress-steps" id="progressSteps"></div>
</div>
<!-- Source Stats (shown after completion) -->
<div class="source-stats" id="sourceStats">
<h4>Statystyki źródeł</h4>
<div class="source-stats-grid" id="sourceStatsGrid"></div>
<!-- Results Container (shown after completion) -->
<div class="search-results-container" id="searchResultsContainer" style="display: none;">
<!-- Summary Stats -->
<div class="search-results-summary" id="searchResultsSummary"></div>
<!-- Auto-approved articles list -->
<div class="auto-approved-section" id="autoApprovedSection" style="display: none;">
<h4>✅ Artykuły automatycznie zaakceptowane (3+★)</h4>
<div class="auto-approved-list" id="autoApprovedList"></div>
</div>
<!-- Countdown to refresh -->
<div class="refresh-countdown" id="refreshCountdown">
<span>Odświeżam za <strong id="countdownSeconds">8</strong> sekund...</span>
<button type="button" class="btn btn-sm btn-secondary" onclick="location.reload()">Odśwież teraz</button>
</div>
</div>
<div id="searchResult" style="margin-top: var(--spacing-md); display: none;"></div>
@ -1758,64 +1923,77 @@ const ALL_SOURCES = Object.keys(SOURCE_NAMES);
async function searchNews() {
const btn = document.getElementById('searchBtn');
const resultDiv = document.getElementById('searchResult');
const progressContainer = document.getElementById('progressContainer');
const progressBar = document.getElementById('progressBar');
const progressStatus = document.getElementById('progressStatus');
const progressPercent = document.getElementById('progressPercent');
const progressPhases = document.getElementById('progressPhases');
const progressSteps = document.getElementById('progressSteps');
const sourceStats = document.getElementById('sourceStats');
const sourceStatsGrid = document.getElementById('sourceStatsGrid');
const resultsContainer = document.getElementById('searchResultsContainer');
const resultsSummary = document.getElementById('searchResultsSummary');
const autoApprovedSection = document.getElementById('autoApprovedSection');
const autoApprovedList = document.getElementById('autoApprovedList');
const query = document.getElementById('searchQuery').value;
// Process phases definition
const PHASES = [
{ id: 'search', icon: '🔍', label: 'Wyszukiwanie' },
{ id: 'filter', icon: '🚫', label: 'Filtrowanie' },
{ id: 'ai', icon: '🤖', label: 'Analiza AI' },
{ id: 'save', icon: '💾', label: 'Zapisywanie' }
];
// Reset UI
btn.disabled = true;
btn.textContent = 'Szukam...';
resultDiv.style.display = 'none';
sourceStats.classList.remove('active');
resultsContainer.style.display = 'none';
autoApprovedSection.style.display = 'none';
progressContainer.classList.add('active');
progressBar.style.width = '0%';
progressBar.style.background = ''; // Reset color
progressPercent.textContent = '0%';
// Build initial progress steps
progressSteps.innerHTML = ALL_SOURCES.map((src, idx) => `
<div class="progress-step pending" id="step-${src}">
<span class="progress-step-icon"></span>
<span>${SOURCE_NAMES[src]}</span>
<span class="progress-step-count" id="count-${src}">-</span>
// Build progress phases UI
progressPhases.innerHTML = PHASES.map(phase => `
<div class="progress-phase pending" id="phase-${phase.id}">
<span class="progress-phase-icon">${phase.icon}</span>
<span>${phase.label}</span>
</div>
`).join('');
// Simulate progress while waiting for API
let currentStep = 0;
const totalSteps = ALL_SOURCES.length + 1; // +1 for cross-verification
// Build initial progress steps (will be populated from process_log)
progressSteps.innerHTML = '<div class="progress-step active"><span class="progress-step-icon"></span><span>Inicjalizacja...</span></div>';
// Simulate progress phases while waiting for API
let currentPhaseIdx = 0;
const phaseMessages = [
'Przeszukuję źródła (Brave API + RSS)...',
'Filtruję wyniki (blacklist, słowa kluczowe)...',
'Analiza AI (Gemini ocenia artykuły)...',
'Zapisuję do bazy wiedzy...'
];
const progressInterval = setInterval(() => {
if (currentStep < ALL_SOURCES.length) {
// Mark previous step as completed
if (currentStep > 0) {
const prevStep = document.getElementById(`step-${ALL_SOURCES[currentStep - 1]}`);
if (prevStep) {
prevStep.classList.remove('active');
prevStep.classList.add('completed');
if (currentPhaseIdx < PHASES.length) {
// Update phase UI
PHASES.forEach((phase, idx) => {
const el = document.getElementById(`phase-${phase.id}`);
if (el) {
el.classList.remove('pending', 'active', 'completed');
if (idx < currentPhaseIdx) el.classList.add('completed');
else if (idx === currentPhaseIdx) el.classList.add('active');
else el.classList.add('pending');
}
}
});
// Mark current step as active
const currStep = document.getElementById(`step-${ALL_SOURCES[currentStep]}`);
if (currStep) {
currStep.classList.remove('pending');
currStep.classList.add('active');
}
progressStatus.textContent = `Przeszukiwanie: ${SOURCE_NAMES[ALL_SOURCES[currentStep]]}`;
const percent = Math.round(((currentStep + 1) / totalSteps) * 80);
progressStatus.textContent = phaseMessages[currentPhaseIdx];
const percent = Math.round(((currentPhaseIdx + 1) / PHASES.length) * 80);
progressBar.style.width = `${percent}%`;
progressPercent.textContent = `${percent}%`;
currentStep++;
currentPhaseIdx++;
}
}, 800);
}, 2500); // Each phase ~2.5s for realistic timing
try {
const response = await fetch('{{ url_for("api_zopk_search_news") }}', {
@ -1831,65 +2009,112 @@ async function searchNews() {
const data = await response.json();
// Mark all steps as completed
ALL_SOURCES.forEach(src => {
const step = document.getElementById(`step-${src}`);
if (step) {
step.classList.remove('pending', 'active');
step.classList.add('completed');
}
});
if (data.success) {
// Update counts from source_stats
if (data.source_stats) {
Object.entries(data.source_stats).forEach(([src, count]) => {
const countEl = document.getElementById(`count-${src}`);
if (countEl) {
countEl.textContent = count;
}
});
}
// Mark all phases as completed
PHASES.forEach(phase => {
const el = document.getElementById(`phase-${phase.id}`);
if (el) {
el.classList.remove('pending', 'active');
el.classList.add('completed');
}
});
// Show cross-verification step
progressStatus.textContent = 'Weryfikacja krzyżowa zakończona';
progressBar.style.width = '100%';
progressPercent.textContent = '100%';
progressStatus.textContent = '✅ Wyszukiwanie zakończone!';
// Show source stats
if (data.source_stats && Object.keys(data.source_stats).length > 0) {
sourceStatsGrid.innerHTML = Object.entries(data.source_stats)
.filter(([src, count]) => count > 0)
.sort((a, b) => b[1] - a[1])
.map(([src, count]) => `
<div class="source-stat-item">
<span>${SOURCE_NAMES[src] || src}</span>
<span class="count">${count}</span>
</div>
`).join('');
sourceStats.classList.add('active');
// Display process log as steps
if (data.process_log && data.process_log.length > 0) {
// Show last few important steps
const importantSteps = data.process_log.filter(log =>
log.step.includes('done') || log.step.includes('complete') || log.phase === 'complete'
).slice(-6);
progressSteps.innerHTML = importantSteps.map(log => `
<div class="progress-step completed">
<span class="progress-step-icon"></span>
<span>${log.message}</span>
${log.count > 0 ? `<span class="progress-step-count">${log.count}</span>` : ''}
</div>
`).join('');
}
// Show result message
resultDiv.style.display = 'block';
resultDiv.innerHTML = `
<p style="color: #dcfce7;">
✓ ${data.message}<br>
<small>Auto-zatwierdzone (3+ źródeł): ${data.auto_approved || 0}</small>
</p>
// Hide progress container after a moment
setTimeout(() => {
progressContainer.classList.remove('active');
}, 1500);
// Show results container
resultsContainer.style.display = 'block';
// Build summary stats
resultsSummary.innerHTML = `
<div class="summary-stat info">
<div class="value">${data.total_found || 0}</div>
<div class="label">Znaleziono</div>
</div>
<div class="summary-stat warning">
<div class="value">${(data.blacklisted || 0) + (data.keyword_filtered || 0)}</div>
<div class="label">Odfiltrowano</div>
</div>
<div class="summary-stat error">
<div class="value">${data.ai_rejected || 0}</div>
<div class="label">AI odrzucił</div>
</div>
<div class="summary-stat success">
<div class="value">${data.ai_approved || 0}</div>
<div class="label">AI zaakceptował</div>
</div>
<div class="summary-stat success">
<div class="value">${data.saved_new || 0}</div>
<div class="label">Nowe w bazie</div>
</div>
`;
// Auto-refresh after 3 seconds
setTimeout(() => {
progressStatus.textContent = 'Odświeżanie strony...';
location.reload();
}, 3000);
// Show auto-approved articles list
if (data.auto_approved_articles && data.auto_approved_articles.length > 0) {
autoApprovedSection.style.display = 'block';
autoApprovedList.innerHTML = data.auto_approved_articles.map(article => {
const stars = '★'.repeat(article.score) + '☆'.repeat(5 - article.score);
return `
<div class="auto-approved-item">
<span class="stars">${stars}</span>
<span class="title">${article.title}</span>
<span class="source">${article.source || ''}</span>
</div>
`;
}).join('');
}
// Start countdown to refresh (8 seconds)
let countdown = 8;
const countdownEl = document.getElementById('countdownSeconds');
const countdownInterval = setInterval(() => {
countdown--;
countdownEl.textContent = countdown;
if (countdown <= 0) {
clearInterval(countdownInterval);
location.reload();
}
}, 1000);
} else {
// Error handling
progressBar.style.width = '100%';
progressBar.style.background = '#fca5a5';
progressStatus.textContent = 'Błąd wyszukiwania';
resultDiv.style.display = 'block';
resultDiv.innerHTML = `<p style="color: #fca5a5;">Błąd: ${data.error}</p>`;
PHASES.forEach(phase => {
const el = document.getElementById(`phase-${phase.id}`);
if (el) el.classList.remove('active');
});
progressSteps.innerHTML = `
<div class="progress-step" style="color: #fca5a5;">
<span class="progress-step-icon"></span>
<span>Błąd: ${data.error}</span>
</div>
`;
btn.disabled = false;
btn.textContent = 'Szukaj artykułów';
}
@ -1898,8 +2123,13 @@ async function searchNews() {
progressBar.style.width = '100%';
progressBar.style.background = '#fca5a5';
progressStatus.textContent = 'Błąd połączenia';
resultDiv.style.display = 'block';
resultDiv.innerHTML = `<p style="color: #fca5a5;">Błąd połączenia: ${error.message}</p>`;
progressSteps.innerHTML = `
<div class="progress-step" style="color: #fca5a5;">
<span class="progress-step-icon"></span>
<span>Błąd połączenia: ${error.message}</span>
</div>
`;
btn.disabled = false;
btn.textContent = 'Szukaj artykułów';
}

View File

@ -248,6 +248,60 @@
color: var(--text-primary);
}
/* Star filter styling */
.star-filter .star-icon {
font-size: 10px;
letter-spacing: -1px;
}
.star-filter.active .star-icon {
color: #f59e0b;
}
/* Bulk actions */
.bulk-actions {
background: var(--surface);
padding: var(--spacing-md);
border-radius: var(--radius);
border: 1px solid var(--border);
}
/* Mass reject modal */
.mass-reject-options {
display: flex;
flex-direction: column;
gap: var(--spacing-sm);
margin: var(--spacing-lg) 0;
}
.mass-reject-option {
display: flex;
align-items: center;
gap: var(--spacing-sm);
padding: var(--spacing-sm) var(--spacing-md);
border: 1px solid var(--border);
border-radius: var(--radius);
cursor: pointer;
transition: var(--transition);
}
.mass-reject-option:hover {
background: var(--background);
}
.mass-reject-option.selected {
background: #fee2e2;
border-color: #dc3545;
}
.mass-reject-option input[type="checkbox"] {
accent-color: #dc3545;
}
.mass-reject-stars {
color: #f59e0b;
font-size: 14px;
}
.mass-reject-count {
margin-left: auto;
font-size: var(--font-size-sm);
color: var(--text-secondary);
}
@media (max-width: 768px) {
.news-table {
display: block;
@ -260,6 +314,13 @@
width: 100%;
margin-top: var(--spacing-md);
}
.filters {
flex-direction: column;
align-items: flex-start;
}
.bulk-actions {
flex-wrap: wrap;
}
}
</style>
{% endblock %}
@ -275,10 +336,19 @@
<div class="filters">
<span class="text-muted">Status:</span>
<a href="{{ url_for('admin_zopk_news', status='all', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'all' %}active{% endif %}">Wszystkie</a>
<a href="{{ url_for('admin_zopk_news', status='pending', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'pending' %}active{% endif %}">Oczekujące</a>
<a href="{{ url_for('admin_zopk_news', status='approved', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'approved' %}active{% endif %}">Zatwierdzone</a>
<a href="{{ url_for('admin_zopk_news', status='rejected', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'rejected' %}active{% endif %}">Odrzucone</a>
<a href="{{ url_for('admin_zopk_news', status='all', stars=current_stars, sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'all' %}active{% endif %}">Wszystkie</a>
<a href="{{ url_for('admin_zopk_news', status='pending', stars=current_stars, sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'pending' %}active{% endif %}">Oczekujące</a>
<a href="{{ url_for('admin_zopk_news', status='approved', stars=current_stars, sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'approved' %}active{% endif %}">Zatwierdzone</a>
<a href="{{ url_for('admin_zopk_news', status='rejected', stars=current_stars, sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_status == 'rejected' %}active{% endif %}">Odrzucone</a>
<span class="text-muted" style="margin-left: var(--spacing-md);">Gwiazdki:</span>
<a href="{{ url_for('admin_zopk_news', status=current_status, stars='all', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_stars == 'all' %}active{% endif %}">Wszystkie</a>
{% for star in [5, 4, 3, 2, 1] %}
<a href="{{ url_for('admin_zopk_news', status=current_status, stars=star, sort=current_sort, dir=current_dir) }}" class="filter-btn star-filter {% if current_stars == star|string %}active{% endif %}">
<span class="star-icon">{{ '★' * star }}{{ '☆' * (5 - star) }}</span>
</a>
{% endfor %}
<a href="{{ url_for('admin_zopk_news', status=current_status, stars='none', sort=current_sort, dir=current_dir) }}" class="filter-btn {% if current_stars == 'none' %}active{% endif %}">Brak oceny</a>
<div class="sort-controls">
<span class="text-muted">Sortuj:</span>
@ -293,6 +363,19 @@
</div>
</div>
<!-- Mass reject by stars -->
<div class="bulk-actions" style="margin-bottom: var(--spacing-lg); display: flex; gap: var(--spacing-md); align-items: center;">
<span class="text-muted">Akcje masowe:</span>
<button class="action-btn reject" onclick="showMassRejectModal()" style="padding: 6px 12px;">
🗑️ Odrzuć po gwiazdkach
</button>
{% if current_stars != 'all' and current_stars != 'none' and current_status == 'pending' %}
<button class="action-btn reject" onclick="rejectCurrentFilter()" style="padding: 6px 12px;">
✕ Odrzuć wszystkie {{ current_stars }}★ ({{ total }})
</button>
{% endif %}
</div>
{% if news_items %}
<div class="news-table-wrapper">
<table class="news-table">
@ -371,21 +454,21 @@
{% if total_pages > 1 %}
<nav class="pagination">
{% if page > 1 %}
<a href="{{ url_for('admin_zopk_news', page=page-1, status=current_status, sort=current_sort, dir=current_dir) }}">&laquo; Poprzednia</a>
<a href="{{ url_for('admin_zopk_news', page=page-1, status=current_status, stars=current_stars, sort=current_sort, dir=current_dir) }}">&laquo; Poprzednia</a>
{% endif %}
{% for p in range(1, total_pages + 1) %}
{% if p == page %}
<span class="current">{{ p }}</span>
{% elif p <= 3 or p > total_pages - 3 or (p >= page - 1 and p <= page + 1) %}
<a href="{{ url_for('admin_zopk_news', page=p, status=current_status, sort=current_sort, dir=current_dir) }}">{{ p }}</a>
<a href="{{ url_for('admin_zopk_news', page=p, status=current_status, stars=current_stars, sort=current_sort, dir=current_dir) }}">{{ p }}</a>
{% elif p == 4 or p == total_pages - 3 %}
<span>...</span>
{% endif %}
{% endfor %}
{% if page < total_pages %}
<a href="{{ url_for('admin_zopk_news', page=page+1, status=current_status, sort=current_sort, dir=current_dir) }}">Następna &raquo;</a>
<a href="{{ url_for('admin_zopk_news', page=page+1, status=current_status, stars=current_stars, sort=current_sort, dir=current_dir) }}">Następna &raquo;</a>
{% endif %}
</nav>
{% endif %}
@ -415,6 +498,44 @@
</div>
</div>
<!-- Mass Reject by Stars Modal -->
<div class="modal-overlay" id="massRejectModal">
<div class="modal" style="max-width: 480px;">
<div style="text-align: center; margin-bottom: var(--spacing-md);">
<div class="modal-icon">🗑️</div>
<h3 style="margin-bottom: var(--spacing-xs);">Masowe odrzucanie po gwiazdkach</h3>
<p class="modal-description">Wybierz oceny gwiazdkowe, które chcesz odrzucić.<br>Dotyczy tylko artykułów <strong>oczekujących</strong>.</p>
</div>
<div class="mass-reject-options" id="massRejectOptions">
{% for star in [1, 2, 3, 4, 5] %}
<label class="mass-reject-option" data-star="{{ star }}">
<input type="checkbox" name="reject_stars" value="{{ star }}">
<span class="mass-reject-stars">{{ '★' * star }}{{ '☆' * (5 - star) }}</span>
<span>{{ star }} {{ 'gwiazdka' if star == 1 else ('gwiazdki' if star < 5 else 'gwiazdek') }}</span>
<span class="mass-reject-count" id="star-count-{{ star }}">— szt.</span>
</label>
{% endfor %}
<label class="mass-reject-option" data-star="0">
<input type="checkbox" name="reject_stars" value="0">
<span class="mass-reject-stars" style="color: var(--text-secondary);"></span>
<span>Brak oceny AI</span>
<span class="mass-reject-count" id="star-count-0">— szt.</span>
</label>
</div>
<div class="form-group">
<label>Powód odrzucenia (wspólny dla wszystkich):</label>
<input type="text" id="massRejectReason" placeholder="np. Niska ocena AI, nieistotne artykuły...">
</div>
<div id="massRejectSummary" style="background: #fee2e2; padding: var(--spacing-md); border-radius: var(--radius); margin-bottom: var(--spacing-md); display: none;">
<strong>Do odrzucenia:</strong> <span id="massRejectTotal">0</span> artykułów
</div>
<div class="modal-actions" style="justify-content: center;">
<button type="button" class="btn btn-secondary" onclick="closeMassRejectModal()">Anuluj</button>
<button type="button" class="btn btn-danger" id="massRejectConfirmBtn" onclick="executeMassReject()">Odrzuć wybrane</button>
</div>
</div>
</div>
<div id="toastContainer" style="position: fixed; top: 80px; right: 20px; z-index: 1100; display: flex; flex-direction: column; gap: 10px;"></div>
<style>
@ -433,6 +554,7 @@
{% block extra_js %}
const csrfToken = '{{ csrf_token() }}';
const currentStars = '{{ current_stars }}';
// Universal Modal System
let confirmModalResolve = null;
@ -556,4 +678,207 @@ async function rejectNews(newsId) {
showToast('Błąd połączenia: ' + error.message, 'error');
}
}
// ============================================
// Mass Reject by Stars
// ============================================
let starCounts = {};
async function showMassRejectModal() {
const modal = document.getElementById('massRejectModal');
// Fetch counts for each star rating
try {
const response = await fetch('/admin/zopk/news/star-counts', {
method: 'GET',
headers: { 'X-CSRFToken': csrfToken }
});
const data = await response.json();
if (data.success) {
starCounts = data.counts;
// Update UI with counts
for (let star = 0; star <= 5; star++) {
const countEl = document.getElementById(`star-count-${star}`);
if (countEl) {
const count = starCounts[star] || 0;
countEl.textContent = `${count} szt.`;
}
}
}
} catch (error) {
console.error('Failed to fetch star counts:', error);
}
// Reset checkboxes
document.querySelectorAll('#massRejectOptions input[type="checkbox"]').forEach(cb => {
cb.checked = false;
cb.closest('.mass-reject-option').classList.remove('selected');
});
document.getElementById('massRejectReason').value = '';
document.getElementById('massRejectSummary').style.display = 'none';
modal.classList.add('active');
// Add change listeners
document.querySelectorAll('#massRejectOptions input[type="checkbox"]').forEach(cb => {
cb.addEventListener('change', updateMassRejectSummary);
});
}
function closeMassRejectModal() {
document.getElementById('massRejectModal').classList.remove('active');
}
function updateMassRejectSummary() {
const checkboxes = document.querySelectorAll('#massRejectOptions input[type="checkbox"]:checked');
let total = 0;
checkboxes.forEach(cb => {
const star = parseInt(cb.value);
total += starCounts[star] || 0;
cb.closest('.mass-reject-option').classList.add('selected');
});
document.querySelectorAll('#massRejectOptions input[type="checkbox"]:not(:checked)').forEach(cb => {
cb.closest('.mass-reject-option').classList.remove('selected');
});
const summary = document.getElementById('massRejectSummary');
const totalEl = document.getElementById('massRejectTotal');
totalEl.textContent = total;
if (total > 0) {
summary.style.display = 'block';
} else {
summary.style.display = 'none';
}
}
async function executeMassReject() {
const checkboxes = document.querySelectorAll('#massRejectOptions input[type="checkbox"]:checked');
const stars = Array.from(checkboxes).map(cb => parseInt(cb.value));
const reason = document.getElementById('massRejectReason').value.trim();
if (stars.length === 0) {
showToast('Wybierz co najmniej jedną ocenę gwiazdkową', 'warning');
return;
}
// Calculate total
let total = 0;
stars.forEach(s => total += starCounts[s] || 0);
if (total === 0) {
showToast('Brak artykułów do odrzucenia', 'info');
closeMassRejectModal();
return;
}
// Confirm
const confirmed = await showConfirm(
`Czy na pewno chcesz odrzucić ${total} artykułów?`,
{
icon: '⚠️',
title: 'Potwierdzenie masowego odrzucenia',
okText: `Odrzuć ${total} artykułów`,
okClass: 'btn-danger'
}
);
if (!confirmed) return;
closeMassRejectModal();
try {
const response = await fetch('/admin/zopk/news/reject-by-stars', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': csrfToken
},
body: JSON.stringify({ stars: stars, reason: reason })
});
const data = await response.json();
if (data.success) {
showToast(`Odrzucono ${data.count} artykułów`, 'success');
setTimeout(() => location.reload(), 1000);
} else {
showToast(data.error || 'Wystąpił błąd', 'error');
}
} catch (error) {
showToast('Błąd połączenia: ' + error.message, 'error');
}
}
async function rejectCurrentFilter() {
const stars = parseInt(currentStars);
if (isNaN(stars) || stars < 1 || stars > 5) {
showToast('Nieprawidłowy filtr gwiazdek', 'error');
return;
}
// Fetch count first
let count = 0;
try {
const response = await fetch('/admin/zopk/news/star-counts');
const data = await response.json();
if (data.success) {
count = data.counts[stars] || 0;
}
} catch (e) {}
if (count === 0) {
showToast('Brak artykułów do odrzucenia', 'info');
return;
}
const confirmed = await showConfirm(
`Czy na pewno chcesz odrzucić wszystkie ${count} artykułów z oceną ${stars}★?`,
{
icon: '⚠️',
title: 'Potwierdzenie odrzucenia',
okText: `Odrzuć ${count} artykułów`,
okClass: 'btn-danger'
}
);
if (!confirmed) return;
try {
const response = await fetch('/admin/zopk/news/reject-by-stars', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-CSRFToken': csrfToken
},
body: JSON.stringify({ stars: [stars], reason: `Masowo odrzucone - ocena ${stars}★` })
});
const data = await response.json();
if (data.success) {
showToast(`Odrzucono ${data.count} artykułów`, 'success');
setTimeout(() => location.reload(), 1000);
} else {
showToast(data.error || 'Wystąpił błąd', 'error');
}
} catch (error) {
showToast('Błąd połączenia: ' + error.message, 'error');
}
}
// Close modal on escape key
document.addEventListener('keydown', (e) => {
if (e.key === 'Escape') {
closeMassRejectModal();
}
});
// Close modal on overlay click
document.getElementById('massRejectModal').addEventListener('click', (e) => {
if (e.target.id === 'massRejectModal') {
closeMassRejectModal();
}
});
{% endblock %}

View File

@ -138,48 +138,294 @@ RSS_SOURCES = {
}
}
# ZOPK-related keywords for filtering
ZOPK_KEYWORDS = [
# Project names
'zielony okręg przemysłowy',
'zopk',
'kaszubia przemysłowa',
# Energy projects
'offshore wind polska',
'offshore bałtyk',
'farma wiatrowa bałtyk',
'elektrownia jądrowa lubiatowo',
'elektrownia jądrowa kopalino',
'pej lubiatowo', # Polskie Elektrownie Jądrowe
# Defense industry
'kongsberg rumia',
'kongsberg polska',
'kongsberg defence',
'przemysł obronny pomorze',
'przemysł zbrojeniowy pomorze',
# Technology
'centrum danych gdynia',
'centrum danych pomorze',
'data center pomorze',
'wodór pomorze',
'hydrogen pomorze',
'laboratoria wodorowe',
# Key people
'samsonowicz mon',
'maciej samsonowicz',
'kosiniak-kamysz przemysł',
# Locations
'transformacja energetyczna pomorze',
'inwestycje wejherowo',
'inwestycje rumia',
'strefa ekonomiczna rumia',
'rumia invest park',
# Organizations
'norda biznes',
'spoko gospodarcze',
'izba gospodarcza pomorze'
# ============================================================
# BRAVE SEARCH - PRECYZYJNE ZAPYTANIA (zamiast jednego ogólnego)
# ============================================================
BRAVE_QUERIES = [
# ============================================================
# GRUPA 1: ZOPK BEZPOŚREDNIO (najwyższy priorytet)
# ============================================================
{
'query': '"Zielony Okręg Przemysłowy" OR "ZOPK Kaszubia"',
'weight': 5,
'description': 'ZOPK - bezpośrednie wzmianki'
},
{
'query': '"Maciej Samsonowicz" MON OR przemysł obronny',
'weight': 5,
'description': 'Samsonowicz - koordynator ZOPK'
},
# ============================================================
# GRUPA 2: PRZEMYSŁ OBRONNY
# ============================================================
{
'query': '"Kongsberg" "Rumia" OR "Kongsberg Defence Poland"',
'weight': 5,
'description': 'Kongsberg Rumia'
},
# ============================================================
# GRUPA 3: OFFSHORE WIND - projekty i firmy
# ============================================================
{
'query': '"Baltic Power" OR "Orsted Polska" offshore',
'weight': 5,
'description': 'Baltic Power / Orsted'
},
{
'query': '"Baltica" Equinor offshore OR "Baltica 2" "Baltica 3"',
'weight': 4,
'description': 'Baltica - Equinor/Polenergia'
},
{
'query': '"F.E.W. Baltic" OR "RWE" offshore Bałtyk wiatrowa',
'weight': 4,
'description': 'F.E.W. Baltic / RWE'
},
# ============================================================
# GRUPA 4: OFFSHORE WIND - infrastruktura i łańcuch dostaw
# ============================================================
{
'query': '"port instalacyjny" offshore OR "hub serwisowy" wiatrowa Gdynia',
'weight': 5,
'description': 'Porty offshore'
},
{
'query': '"CRIST" offshore OR "Remontowa Shipbuilding" wiatrowa',
'weight': 4,
'description': 'Stocznie dla offshore'
},
{
'query': '"ST3 Offshore" OR "GSG Towers" wieże wiatrowe',
'weight': 3,
'description': 'Producenci konstrukcji'
},
# ============================================================
# GRUPA 5: ELEKTROWNIA JĄDROWA
# ============================================================
{
'query': '"elektrownia jądrowa" "Lubiatowo" OR "Choczewo" OR "Kopalino"',
'weight': 5,
'description': 'EJ Lubiatowo-Kopalino'
},
{
'query': '"Polskie Elektrownie Jądrowe" OR "PEJ" atom',
'weight': 5,
'description': 'PEJ - spółka'
},
{
'query': '"Westinghouse" Polska OR "AP1000" elektrownia',
'weight': 5,
'description': 'Westinghouse - technologia'
},
{
'query': '"Bechtel" Polska atom OR elektrownia jądrowa',
'weight': 4,
'description': 'Bechtel - wykonawca'
},
# ============================================================
# GRUPA 6: SMR (Małe Reaktory Modularne)
# ============================================================
{
'query': '"SMR" Polska OR "Orlen Synthos Green Energy" reaktor',
'weight': 4,
'description': 'SMR - małe reaktory'
},
{
'query': '"BWRX-300" OR "GE Hitachi" Polska atom',
'weight': 4,
'description': 'BWRX-300 / GE Hitachi'
},
# ============================================================
# GRUPA 7: WODÓR I NOWE TECHNOLOGIE
# ============================================================
{
'query': '"Dolina Wodorowa" Pomorze OR "H2Gdańsk"',
'weight': 4,
'description': 'Dolina Wodorowa'
},
{
'query': '"wodór zielony" Gdańsk OR Gdynia OR Pomorze',
'weight': 3,
'description': 'Wodór zielony Pomorze'
},
{
'query': '"centrum danych" Gdynia OR "data center" Pomorze',
'weight': 4,
'description': 'Centra danych'
},
# ============================================================
# GRUPA 8: STREFY EKONOMICZNE I SAMORZĄDY
# ============================================================
{
'query': '"Rumia Invest Park" OR "strefa ekonomiczna Rumia"',
'weight': 4,
'description': 'Rumia Invest Park'
},
{
'query': '"gmina Choczewo" atom OR inwestycje',
'weight': 4,
'description': 'Gmina Choczewo'
},
{
'query': '"gmina Krokowa" OR "powiat pucki" offshore energia',
'weight': 3,
'description': 'Samorządy lokalne'
},
# ============================================================
# GRUPA 9: PORTY I LOGISTYKA
# ============================================================
{
'query': '"Port Gdynia" offshore OR inwestycje terminal',
'weight': 4,
'description': 'Port Gdynia'
},
{
'query': '"Port Gdańsk" offshore OR "DCT" inwestycje',
'weight': 3,
'description': 'Port Gdańsk / DCT'
},
# ============================================================
# GRUPA 10: ENERGETYKA LOKALNA
# ============================================================
{
'query': '"Energa" offshore OR "Energa" inwestycje Pomorze',
'weight': 3,
'description': 'Energa - lokalny operator'
},
# ============================================================
# GRUPA 11: WYDARZENIA BRANŻOWE
# ============================================================
{
'query': '"Offshore Wind Poland" konferencja OR "PSEW" wiatrowa',
'weight': 3,
'description': 'Konferencje offshore'
},
{
'query': '"Forum Energii" Pomorze OR "WindEurope" Polska',
'weight': 3,
'description': 'Eventy energetyczne'
}
]
# ============================================================
# BLACKLISTA DOMEN - automatyczne odrzucanie
# ============================================================
BLACKLISTED_DOMAINS = {
# Sport
'sport.pl', 'meczyki.pl', 'sportowefakty.wp.pl', 'przegladsportowy.pl',
'sport.tvp.pl', 'goal.pl', 'sportbuzz.pl', 'pilkanozna.pl',
# Plotki i lifestyle
'pudelek.pl', 'plotek.pl', 'pomponik.pl', 'kozaczek.pl', 'jastrząbpost.pl',
'plejada.pl', 'party.pl', 'viva.pl', 'gala.pl',
# Ogólne newsy bez kontekstu lokalnego
'se.pl', 'fakt.pl', 'natemat.pl',
# Inne nieistotne
'pogoda.interia.pl', 'allegro.pl', 'olx.pl', 'pracuj.pl',
'gratka.pl', 'otodom.pl', 'otomoto.pl',
# Zagraniczne
'reuters.com', 'bbc.com', 'cnn.com', 'theguardian.com'
}
# Domeny preferowane (bonus do oceny)
PREFERRED_DOMAINS = {
'trojmiasto.pl': 2, 'dziennikbaltycki.pl': 2, 'nordafm.pl': 3,
'ttm24.pl': 3, 'nadmorski24.pl': 2, 'gdynia.pl': 2,
'wejherowo.pl': 2, 'rumia.eu': 2, 'gov.pl': 1,
'biznes.gov.pl': 2, 'wnp.pl': 1, 'wysokienapiecie.pl': 2,
'energetyka24.com': 2, 'defence24.pl': 2, 'gospodarkamorska.pl': 2
}
# ============================================================
# ZOPK KEYWORDS - słowa kluczowe do pre-filtrowania
# ============================================================
# ZOPK-related keywords for filtering (rozszerzone i pogrupowane)
ZOPK_KEYWORDS_CRITICAL = [
# MUST HAVE - bezpośrednie trafienia (wystarczy 1) → score 5
'zielony okręg przemysłowy', 'zopk',
# Kongsberg
'kongsberg rumia', 'kongsberg defence', 'kongsberg poland',
# Osoba kluczowa
'maciej samsonowicz', 'samsonowicz mon',
# Elektrownia jądrowa - lokalizacje
'lubiatowo kopalino', 'elektrownia jądrowa lubiatowo', 'elektrownia jądrowa choczewo',
# Główne projekty offshore
'baltic power', 'baltica offshore', 'baltica 2', 'baltica 3',
# Strefa ekonomiczna
'rumia invest park',
# PEJ
'polskie elektrownie jądrowe', 'pej lubiatowo',
# Westinghouse/Bechtel
'westinghouse polska', 'ap1000 polska', 'bechtel polska',
# Port instalacyjny
'port instalacyjny offshore'
]
ZOPK_KEYWORDS_STRONG = [
# STRONG - mocne powiązania (wystarczy 1) → score 4
# Offshore wind
'offshore bałtyk', 'farma wiatrowa bałtyk', 'morska energetyka wiatrowa',
'orsted polska', 'equinor polska', 'rwe offshore', 'few baltic', 'ocean winds',
'hub serwisowy offshore',
# Stocznie dla offshore
'crist offshore', 'remontowa shipbuilding', 'st3 offshore', 'gsg towers',
# Atom - wykonawcy i technologia
'kongsberg polska', 'bwrx-300', 'ge hitachi polska',
# SMR
'orlen synthos', 'smr polska', 'małe reaktory modularne',
# Przemysł obronny
'przemysł obronny pomorze',
# Wodór
'dolina wodorowa', 'h2gdańsk', 'wodór zielony gdańsk', 'wodór zielony gdynia',
'laboratoria wodorowe',
# Data center
'centrum danych gdynia', 'data center gdynia',
# Samorządy
'gmina choczewo', 'gmina krokowa', 'powiat pucki',
# Porty
'port gdynia offshore', 'terminal offshore gdynia',
# Osoby
'kosiniak-kamysz przemysł',
# Transformacja
'transformacja energetyczna pomorze'
]
ZOPK_KEYWORDS_WEAK = [
# WEAK - słabe powiązania (potrzeba 2+ lub w połączeniu z lokalizacją) → score 2-3
'offshore wind', 'elektrownia jądrowa', 'przemysł obronny', 'przemysł zbrojeniowy',
'inwestycje przemysłowe', 'strefa ekonomiczna', 'centrum danych', 'data center',
'farma wiatrowa', 'energia odnawialna', 'atom polska', 'energetyka jądrowa',
'morskie wiatrowe', 'turbiny wiatrowe', 'fundamenty offshore', 'monopile',
'wodór zielony', 'hydrogen', 'magazyn energii',
'port instalacyjny', 'hub logistyczny', 'stocznia',
'psew', 'offshore wind poland', 'windeurope', 'forum energii',
'energa inwestycje'
]
ZOPK_LOCATIONS = [
# Lokalizacje które wzmacniają słabe keywords
'kaszuby', 'kaszubia', 'pomorze', 'pomorskie',
'wejherowo', 'rumia', 'gdynia', 'gdańsk', 'reda', 'puck',
'choczewo', 'lubiatowo', 'kopalino', 'żarnowiec', 'krokowa',
'bałtyk', 'baltyk', 'morze bałtyckie',
'trójmiasto', 'trojmiasto'
]
# Pełna lista (dla kompatybilności wstecznej)
ZOPK_KEYWORDS = ZOPK_KEYWORDS_CRITICAL + ZOPK_KEYWORDS_STRONG + ZOPK_KEYWORDS_WEAK
@dataclass
class NewsItem:
@ -246,67 +492,428 @@ def normalize_title_hash(title: str) -> str:
return hashlib.sha256(text.encode()).hexdigest()[:32]
def is_zopk_relevant(title: str, description: str = '') -> bool:
"""Check if content is relevant to ZOPK topics"""
def is_blacklisted_domain(domain: str) -> bool:
"""Check if domain is on the blacklist"""
domain = domain.lower().replace('www.', '')
return domain in BLACKLISTED_DOMAINS
def get_domain_bonus(domain: str) -> int:
"""Get bonus score for preferred domains"""
domain = domain.lower().replace('www.', '')
# Check exact match
if domain in PREFERRED_DOMAINS:
return PREFERRED_DOMAINS[domain]
# Check if domain ends with preferred (e.g., biznes.trojmiasto.pl)
for pref_domain, bonus in PREFERRED_DOMAINS.items():
if domain.endswith(pref_domain):
return bonus
return 0
def calculate_keyword_score(title: str, description: str = '') -> dict:
"""
Calculate keyword relevance score.
Returns:
dict with:
- score: 0-5 (0 = no match, 5 = critical keyword)
- matches: list of matched keywords
- reason: explanation
"""
text = f"{title} {description}".lower()
for keyword in ZOPK_KEYWORDS:
if keyword.lower() in text:
return True
matches = {
'critical': [],
'strong': [],
'weak': [],
'locations': []
}
return False
# Check critical keywords (instant high score)
for kw in ZOPK_KEYWORDS_CRITICAL:
if kw.lower() in text:
matches['critical'].append(kw)
# Check strong keywords
for kw in ZOPK_KEYWORDS_STRONG:
if kw.lower() in text:
matches['strong'].append(kw)
# Check weak keywords
for kw in ZOPK_KEYWORDS_WEAK:
if kw.lower() in text:
matches['weak'].append(kw)
# Check locations
for loc in ZOPK_LOCATIONS:
if loc.lower() in text:
matches['locations'].append(loc)
# Calculate score
if matches['critical']:
score = 5
reason = f"Trafienie krytyczne: {matches['critical'][0]}"
elif matches['strong']:
score = 4
reason = f"Mocne powiązanie: {matches['strong'][0]}"
elif matches['weak'] and matches['locations']:
# Weak keyword + location = medium score
score = 3
reason = f"Słabe + lokalizacja: {matches['weak'][0]} + {matches['locations'][0]}"
elif len(matches['weak']) >= 2:
# Multiple weak keywords = medium score
score = 3
reason = f"Wiele słabych: {', '.join(matches['weak'][:2])}"
elif matches['weak']:
# Single weak keyword = low score
score = 2
reason = f"Tylko słabe: {matches['weak'][0]}"
elif matches['locations']:
# Only location, no industry keywords
score = 1
reason = f"Tylko lokalizacja: {matches['locations'][0]}"
else:
score = 0
reason = "Brak trafień słów kluczowych"
return {
'score': score,
'matches': matches,
'reason': reason,
'total_matches': sum(len(v) for v in matches.values())
}
def is_zopk_relevant(title: str, description: str = '') -> bool:
"""Check if content is relevant to ZOPK topics (legacy compatibility)"""
result = calculate_keyword_score(title, description)
return result['score'] >= 3
class ZOPKNewsService:
"""
Multi-source news search service with cross-verification.
Multi-source news search service with cross-verification and AI pre-filtering.
NOWY PIPELINE (2026-01):
1. Wyszukiwanie: wiele precyzyjnych zapytań Brave + RSS
2. Pre-filtrowanie: blacklista domen + słowa kluczowe
3. Ocena AI: PRZED zapisem do bazy (tylko 3+)
4. Zapis: tylko wysokiej jakości artykuły
"""
def __init__(self, db_session, brave_api_key: Optional[str] = None):
def __init__(self, db_session, brave_api_key: Optional[str] = None, enable_ai_prefilter: bool = True):
self.db = db_session
self.brave_api_key = brave_api_key or os.getenv('BRAVE_API_KEY')
self.enable_ai_prefilter = enable_ai_prefilter
self._gemini_service = None
def search_all_sources(self, query: str = 'Zielony Okręg Przemysłowy Kaszubia') -> Dict:
def _get_gemini(self):
"""Lazy load Gemini service"""
if self._gemini_service is None:
try:
from gemini_service import get_gemini_service
self._gemini_service = get_gemini_service()
except Exception as e:
logger.error(f"Failed to load Gemini: {e}")
return self._gemini_service
def search_all_sources(self, query: str = None, user_id: int = None) -> Dict:
"""
Search all sources and return aggregated results with cross-verification.
Search all sources with IMPROVED PIPELINE:
1. Multiple precise Brave queries
2. Pre-filter by domain blacklist and keywords
3. AI evaluation BEFORE saving (reject 1-2)
4. Save only quality items (3+)
Args:
query: Deprecated, ignored. Uses BRAVE_QUERIES instead.
user_id: User ID for tracking AI usage
Returns:
Dict with search results and statistics
Dict with search results, statistics, and detailed process log
"""
all_items: List[NewsItem] = []
source_stats = {}
source_stats = {
'brave_queries': 0,
'brave_results': 0,
'rss_results': 0,
'blacklisted': 0,
'keyword_filtered': 0,
'ai_rejected': 0,
'ai_approved': 0
}
# Process log for frontend progress display
process_log = []
auto_approved_articles = [] # Track articles auto-approved (3+★)
# 1. BRAVE SEARCH - Multiple precise queries
process_log.append({
'phase': 'search',
'step': 'brave_start',
'message': f'Rozpoczynam wyszukiwanie Brave ({len(BRAVE_QUERIES)} zapytań)...',
'count': len(BRAVE_QUERIES)
})
# 1. Brave Search API
if self.brave_api_key:
brave_items = self._search_brave(query)
all_items.extend(brave_items)
source_stats['brave'] = len(brave_items)
logger.info(f"Brave Search: found {len(brave_items)} items")
for i, query_config in enumerate(BRAVE_QUERIES):
brave_items = self._search_brave_single(query_config['query'])
source_stats['brave_queries'] += 1
source_stats['brave_results'] += len(brave_items)
all_items.extend(brave_items)
logger.info(f"Brave '{query_config['description']}': {len(brave_items)} items")
process_log.append({
'phase': 'search',
'step': f'brave_{i+1}',
'message': f"Brave: {query_config['description']}",
'count': len(brave_items)
})
else:
process_log.append({
'phase': 'search',
'step': 'brave_skip',
'message': 'Brave API niedostępne - pominięto',
'count': 0
})
process_log.append({
'phase': 'search',
'step': 'brave_done',
'message': f'Brave: znaleziono {source_stats["brave_results"]} artykułów',
'count': source_stats['brave_results']
})
# 2. RSS Feeds
process_log.append({
'phase': 'search',
'step': 'rss_start',
'message': f'Przeszukuję {len(RSS_SOURCES)} źródeł RSS...',
'count': len(RSS_SOURCES)
})
for source_id, source_config in RSS_SOURCES.items():
rss_items = self._fetch_rss(source_id, source_config)
all_items.extend(rss_items)
source_stats[source_id] = len(rss_items)
logger.info(f"RSS {source_id}: found {len(rss_items)} items")
source_stats['rss_results'] += len(rss_items)
# 3. Cross-verify and deduplicate
verified_items = self._cross_verify(all_items)
if rss_items:
process_log.append({
'phase': 'search',
'step': f'rss_{source_id}',
'message': f"RSS: {source_config['name']}",
'count': len(rss_items)
})
process_log.append({
'phase': 'search',
'step': 'rss_done',
'message': f'RSS: znaleziono {source_stats["rss_results"]} artykułów',
'count': source_stats['rss_results']
})
logger.info(f"Total raw items: {len(all_items)}")
total_raw = len(all_items)
process_log.append({
'phase': 'search',
'step': 'search_complete',
'message': f'📥 Łącznie pobrano: {total_raw} artykułów',
'count': total_raw
})
# 3. PRE-FILTER: Domain blacklist
process_log.append({
'phase': 'filter',
'step': 'blacklist_start',
'message': 'Filtrowanie: sprawdzam blacklistę domen...',
'count': 0
})
filtered_items = []
blacklisted_domains_found = set()
for item in all_items:
if is_blacklisted_domain(item.domain):
source_stats['blacklisted'] += 1
blacklisted_domains_found.add(item.domain)
logger.debug(f"Blacklisted domain: {item.domain}")
continue
filtered_items.append(item)
logger.info(f"After blacklist filter: {len(filtered_items)} (removed {source_stats['blacklisted']})")
process_log.append({
'phase': 'filter',
'step': 'blacklist_done',
'message': f'🚫 Blacklist: usunięto {source_stats["blacklisted"]} artykułów (sport, plotki, lifestyle)',
'count': source_stats['blacklisted']
})
# 4. PRE-FILTER: Keyword score (minimum 2)
process_log.append({
'phase': 'filter',
'step': 'keywords_start',
'message': 'Filtrowanie: analiza słów kluczowych ZOPK...',
'count': 0
})
keyword_filtered = []
for item in filtered_items:
kw_result = calculate_keyword_score(item.title, item.description)
if kw_result['score'] >= 2: # At least weak relevance
item.keyword_score = kw_result['score']
item.keyword_reason = kw_result['reason']
keyword_filtered.append(item)
else:
source_stats['keyword_filtered'] += 1
logger.info(f"After keyword filter: {len(keyword_filtered)} (removed {source_stats['keyword_filtered']})")
process_log.append({
'phase': 'filter',
'step': 'keywords_done',
'message': f'🔑 Keywords: usunięto {source_stats["keyword_filtered"]} (brak słów kluczowych ZOPK)',
'count': source_stats['keyword_filtered']
})
process_log.append({
'phase': 'filter',
'step': 'filter_complete',
'message': f'✅ Po filtrowaniu: {len(keyword_filtered)} artykułów do analizy AI',
'count': len(keyword_filtered)
})
# 5. Cross-verify and deduplicate
verified_items = self._cross_verify(keyword_filtered)
logger.info(f"After deduplication: {len(verified_items)} unique items")
process_log.append({
'phase': 'filter',
'step': 'dedup_done',
'message': f'🔄 Deduplikacja: {len(verified_items)} unikalnych artykułów',
'count': len(verified_items)
})
# 6. AI EVALUATION (before saving) - only if enabled
if self.enable_ai_prefilter and self._get_gemini():
process_log.append({
'phase': 'ai',
'step': 'ai_start',
'message': f'🤖 AI (Gemini): rozpoczynam ocenę {len(verified_items)} artykułów...',
'count': len(verified_items)
})
ai_approved = []
ai_evaluated_count = 0
for item in verified_items:
ai_result = evaluate_news_relevance(
{
'title': item['title'],
'description': item['description'],
'source_name': item['source_name'],
'published_at': item.get('published_at')
},
self._get_gemini(),
user_id=user_id
)
ai_evaluated_count += 1
if ai_result.get('evaluated'):
ai_score = ai_result.get('score', 0)
if ai_score >= 3:
# Good score - save it
item['ai_score'] = ai_score
item['ai_reason'] = ai_result.get('reason', '')
item['ai_relevant'] = True
ai_approved.append(item)
source_stats['ai_approved'] += 1
# Track for frontend display
auto_approved_articles.append({
'title': item['title'][:80] + ('...' if len(item['title']) > 80 else ''),
'score': ai_score,
'source': item.get('source_name', item.get('source_domain', ''))
})
logger.debug(f"AI approved ({ai_score}★): {item['title'][:50]}")
else:
# Low score - reject before saving
source_stats['ai_rejected'] += 1
logger.debug(f"AI rejected ({ai_score}★): {item['title'][:50]}")
else:
# AI evaluation failed - save as pending for manual review
item['ai_score'] = None
item['ai_reason'] = ai_result.get('reason', 'AI evaluation failed')
item['ai_relevant'] = None
ai_approved.append(item)
source_stats['ai_approved'] += 1
verified_items = ai_approved
logger.info(f"After AI filter: {len(verified_items)} approved, {source_stats['ai_rejected']} rejected")
process_log.append({
'phase': 'ai',
'step': 'ai_done',
'message': f'🤖 AI: oceniono {ai_evaluated_count}, zaakceptowano {source_stats["ai_approved"]} (3+★), odrzucono {source_stats["ai_rejected"]}',
'count': source_stats['ai_approved']
})
else:
logger.info("AI pre-filter disabled or Gemini unavailable")
process_log.append({
'phase': 'ai',
'step': 'ai_skip',
'message': '🤖 AI: wyłączony lub niedostępny',
'count': 0
})
# 7. Save to database (only quality items)
process_log.append({
'phase': 'save',
'step': 'save_start',
'message': f'💾 Zapisuję {len(verified_items)} artykułów do bazy...',
'count': len(verified_items)
})
# 4. Save to database
saved_count, updated_count = self._save_to_database(verified_items)
process_log.append({
'phase': 'save',
'step': 'save_done',
'message': f'💾 Zapisano: {saved_count} nowych, {updated_count} zaktualizowanych',
'count': saved_count + updated_count
})
# Final summary
# Note: score >= 3 triggers auto-approve (verified 2026-01-15)
auto_approved_count = sum(1 for item in verified_items if item.get('auto_approve', False) or (item.get('ai_score') and item['ai_score'] >= 3))
process_log.append({
'phase': 'complete',
'step': 'done',
'message': f'✅ Zakończono! {saved_count} nowych artykułów w bazie wiedzy.',
'count': saved_count
})
return {
'total_found': len(all_items),
'total_found': source_stats['brave_results'] + source_stats['rss_results'],
'blacklisted': source_stats['blacklisted'],
'keyword_filtered': source_stats['keyword_filtered'],
'ai_rejected': source_stats['ai_rejected'],
'ai_approved': source_stats['ai_approved'],
'unique_items': len(verified_items),
'saved_new': saved_count,
'updated_existing': updated_count,
'source_stats': source_stats,
'auto_approved': sum(1 for item in verified_items if item.get('auto_approve', False))
'auto_approved': auto_approved_count,
'process_log': process_log,
'auto_approved_articles': auto_approved_articles
}
def _search_brave(self, query: str) -> List[NewsItem]:
"""Search Brave API for news"""
def _search_brave_single(self, query: str) -> List[NewsItem]:
"""Search Brave API with a single query"""
if not self.brave_api_key:
return []
@ -318,8 +925,8 @@ class ZOPKNewsService:
}
params = {
'q': query,
'count': 20,
'freshness': 'pm', # past month
'count': 10, # Fewer results per query (we have 8 queries)
'freshness': 'pw', # past week (more relevant than past month)
'country': 'pl',
'search_lang': 'pl'
}
@ -341,18 +948,26 @@ class ZOPKNewsService:
description=item.get('description', ''),
source_name=item.get('source', ''),
source_type='brave',
source_id='brave_search',
published_at=datetime.now(), # Brave doesn't provide exact date
source_id=f'brave_{query[:20]}',
published_at=datetime.now(),
image_url=item.get('thumbnail', {}).get('src')
))
else:
logger.error(f"Brave API error: {response.status_code}")
logger.error(f"Brave API error for '{query[:30]}': {response.status_code}")
except Exception as e:
logger.error(f"Brave search error: {e}")
return items
def _search_brave(self, query: str) -> List[NewsItem]:
"""Legacy method - redirects to new multi-query approach"""
# Kept for compatibility, but now uses multiple queries
all_items = []
for query_config in BRAVE_QUERIES:
all_items.extend(self._search_brave_single(query_config['query']))
return all_items
def _fetch_rss(self, source_id: str, config: Dict) -> List[NewsItem]:
"""Fetch and parse RSS feed"""
items = []
@ -503,7 +1118,15 @@ class ZOPKNewsService:
updated_count += 1
else:
# Create new entry
status = 'auto_approved' if item['auto_approve'] else 'pending'
# Determine status based on AI score (if available)
# Note: score >= 3 triggers auto-approve (verified 2026-01-15)
ai_score = item.get('ai_score')
if ai_score and ai_score >= 3:
status = 'auto_approved' # AI score 3+ = auto-approve
elif item.get('auto_approve'):
status = 'auto_approved' # Multiple sources = auto-approve
else:
status = 'pending'
news = ZOPKNews(
title=item['title'],
@ -515,12 +1138,18 @@ class ZOPKNewsService:
source_domain=item['source_domain'],
source_type=item['source_type'],
published_at=item['published_at'],
image_url=item['image_url'],
image_url=item.get('image_url'),
confidence_score=item['confidence_score'],
source_count=item['source_count'],
sources_list=item['sources_list'],
is_auto_verified=item['auto_approve'],
status=status
is_auto_verified=item.get('auto_approve', False) or (ai_score and ai_score >= 3),
status=status,
# AI evaluation results from pre-filtering
ai_relevant=item.get('ai_relevant'),
ai_relevance_score=ai_score,
ai_evaluation_reason=item.get('ai_reason', '')[:255] if item.get('ai_reason') else None,
ai_evaluated_at=datetime.now() if ai_score else None,
ai_model='gemini-2.0-flash' if ai_score else None
)
self.db.add(news)
new_count += 1
@ -529,16 +1158,29 @@ class ZOPKNewsService:
return new_count, updated_count
def search_zopk_news(db_session, query: str = None) -> Dict:
def search_zopk_news(db_session, query: str = None, user_id: int = None, enable_ai_prefilter: bool = True) -> Dict:
"""
Convenience function to search ZOPK news from all sources.
NOWY PIPELINE (2026-01):
- 8 precyzyjnych zapytań Brave (zamiast 1 ogólnego)
- Blacklista domen (sport, plotki, lifestyle)
- Pre-filtrowanie po słowach kluczowych (min. score 2)
- Ocena AI PRZED zapisem (odrzuca 1-2)
- Tylko artykuły 3+ lądują w bazie
Args:
db_session: SQLAlchemy session
query: Deprecated, ignored
user_id: User ID for tracking AI usage
enable_ai_prefilter: If True, evaluate with AI before saving (default: True)
Usage:
from zopk_news_service import search_zopk_news
results = search_zopk_news(db)
results = search_zopk_news(db, user_id=current_user.id)
"""
service = ZOPKNewsService(db_session)
return service.search_all_sources(query or 'Zielony Okręg Przemysłowy Kaszubia')
service = ZOPKNewsService(db_session, enable_ai_prefilter=enable_ai_prefilter)
return service.search_all_sources(user_id=user_id)
# ============================================================