nordabiz/blueprints/admin/routes_zopk_news.py
Maciej Pienczyn 3b3bb7bdd7
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
fix(zopk): Polish error messages and show failures as skipped, not errors
Admin was confused by red "Błędy: 2" when scraping/extraction had
expected issues (403, content too short). Changes:
- All scraper/extractor messages translated to Polish
- HTTP 403/404/429 get specific descriptive messages
- Expected failures shown as yellow "Pominięte" instead of red "Błędy"
- "No chunks created" → "Treść za krótka do ekstrakcji"
- Summary label "Błędy" → "Pominięte"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 15:36:00 +01:00

886 lines
30 KiB
Python

"""
ZOPK News Routes - Admin blueprint
Migrated from app.py as part of the blueprint refactoring.
Contains routes for ZOPK news management, scraping, and AI evaluation.
"""
import hashlib
import json
import logging
import queue
import threading
import uuid
from datetime import datetime
from urllib.parse import urlparse
from flask import flash, jsonify, redirect, render_template, request, url_for, Response, stream_with_context
from flask_login import current_user, login_required
from sqlalchemy import desc, asc, func, or_
from sqlalchemy.sql import nullslast
from database import (
SessionLocal,
SystemRole,
ZOPKProject,
ZOPKNews,
ZOPKNewsFetchJob
)
from utils.decorators import role_required
from . import bp
logger = logging.getLogger(__name__)
@bp.route('/zopk/news')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_news():
"""Admin news management for ZOPK"""
db = SessionLocal()
try:
page = request.args.get('page', 1, type=int)
status = request.args.get('status', 'all')
stars = request.args.get('stars', 'all') # 'all', '1'-'5', 'none'
sort_by = request.args.get('sort', 'date') # 'date', 'score', 'title'
sort_dir = request.args.get('dir', 'desc') # 'asc', 'desc'
per_page = 50
query = db.query(ZOPKNews)
if status != 'all':
query = query.filter(ZOPKNews.status == status)
# Filter by star rating
if stars == 'none':
query = query.filter(ZOPKNews.ai_relevance_score.is_(None))
elif stars in ['1', '2', '3', '4', '5']:
query = query.filter(ZOPKNews.ai_relevance_score == int(stars))
# 'all' - no filter
# Apply sorting
sort_func = desc if sort_dir == 'desc' else asc
if sort_by == 'score':
# Sort by AI score (nulls last so evaluated items come first)
query = query.order_by(nullslast(sort_func(ZOPKNews.ai_relevance_score)))
elif sort_by == 'title':
query = query.order_by(sort_func(ZOPKNews.title))
else: # default: date
query = query.order_by(sort_func(ZOPKNews.published_at))
total = query.count()
news_items = query.offset((page - 1) * per_page).limit(per_page).all()
total_pages = (total + per_page - 1) // per_page
projects = db.query(ZOPKProject).order_by(ZOPKProject.sort_order).all()
return render_template('admin/zopk_news.html',
news_items=news_items,
projects=projects,
page=page,
total_pages=total_pages,
total=total,
current_status=status,
current_stars=stars,
current_sort=sort_by,
current_dir=sort_dir
)
finally:
db.close()
@bp.route('/zopk/news/<int:news_id>/approve', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_news_approve(news_id):
"""Approve a ZOPK news item"""
db = SessionLocal()
try:
news = db.query(ZOPKNews).filter(ZOPKNews.id == news_id).first()
if not news:
return jsonify({'success': False, 'error': 'Nie znaleziono newsa'}), 404
news.status = 'approved'
news.moderated_by = current_user.id
news.moderated_at = datetime.now()
db.commit()
return jsonify({'success': True, 'message': 'News został zatwierdzony'})
except Exception as e:
db.rollback()
logger.error(f"Error approving ZOPK news {news_id}: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas zatwierdzania'}), 500
finally:
db.close()
@bp.route('/zopk/news/<int:news_id>/reject', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_news_reject(news_id):
"""Reject a ZOPK news item"""
db = SessionLocal()
try:
data = request.get_json() or {}
reason = data.get('reason', '')
news = db.query(ZOPKNews).filter(ZOPKNews.id == news_id).first()
if not news:
return jsonify({'success': False, 'error': 'Nie znaleziono newsa'}), 404
news.status = 'rejected'
news.moderated_by = current_user.id
news.moderated_at = datetime.now()
news.rejection_reason = reason
db.commit()
return jsonify({'success': True, 'message': 'News został odrzucony'})
except Exception as e:
db.rollback()
logger.error(f"Error rejecting ZOPK news {news_id}: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas odrzucania'}), 500
finally:
db.close()
@bp.route('/zopk/news/add', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_news_add():
"""Manually add a ZOPK news item"""
db = SessionLocal()
try:
data = request.get_json() or {}
title = data.get('title', '').strip()
url = data.get('url', '').strip()
description = data.get('description', '').strip()
source_name = data.get('source_name', '').strip()
project_id = data.get('project_id')
if not title or not url:
return jsonify({'success': False, 'error': 'Tytuł i URL są wymagane'}), 400
# SECURITY: Validate URL protocol (block javascript:, data:, etc.)
parsed = urlparse(url)
allowed_protocols = ('http', 'https')
if parsed.scheme.lower() not in allowed_protocols:
return jsonify({'success': False, 'error': 'Nieprawidłowy protokół URL. Dozwolone: http, https'}), 400
# SECURITY: Validate project_id if provided
if project_id:
try:
project_id = int(project_id)
project = db.query(ZOPKProject).filter(ZOPKProject.id == project_id).first()
if not project:
return jsonify({'success': False, 'error': 'Nieprawidłowy ID projektu'}), 400
except (ValueError, TypeError):
return jsonify({'success': False, 'error': 'ID projektu musi być liczbą'}), 400
else:
project_id = None
# Generate URL hash for deduplication
url_hash = hashlib.sha256(url.encode()).hexdigest()
# Check if URL already exists
existing = db.query(ZOPKNews).filter(ZOPKNews.url_hash == url_hash).first()
if existing:
return jsonify({'success': False, 'error': 'Ten artykuł już istnieje w bazie'}), 400
# Extract domain from URL
source_domain = parsed.netloc.replace('www.', '')
news = ZOPKNews(
title=title,
url=url,
url_hash=url_hash,
description=description,
source_name=source_name or source_domain,
source_domain=source_domain,
source_type='manual',
status='approved', # Manual entries are auto-approved
moderated_by=current_user.id,
moderated_at=datetime.now(),
published_at=datetime.now(),
project_id=project_id
)
db.add(news)
db.commit()
return jsonify({
'success': True,
'message': 'News został dodany',
'news_id': news.id
})
except Exception as e:
db.rollback()
logger.error(f"Error adding ZOPK news: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas dodawania newsa'}), 500
finally:
db.close()
@bp.route('/zopk/news/reject-old', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_reject_old_news():
"""Reject all news from before a certain year (ZOPK didn't exist then)"""
db = SessionLocal()
try:
data = request.get_json() or {}
min_year = data.get('min_year', 2024)
# Find all pending news from before min_year
min_date = datetime(min_year, 1, 1)
old_news = db.query(ZOPKNews).filter(
ZOPKNews.status == 'pending',
ZOPKNews.published_at < min_date
).all()
count = len(old_news)
# Reject them all
for news in old_news:
news.status = 'rejected'
news.moderated_by = current_user.id
news.moderated_at = datetime.now()
news.rejection_reason = f'Automatycznie odrzucony - artykuł sprzed {min_year} roku (ZOP Kaszubia powstał w 2024)'
db.commit()
return jsonify({
'success': True,
'message': f'Odrzucono {count} newsów sprzed {min_year} roku',
'count': count
})
except Exception as e:
db.rollback()
logger.error(f"Error rejecting old ZOPK news: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas odrzucania starych newsów'}), 500
finally:
db.close()
@bp.route('/zopk/news/star-counts')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_news_star_counts():
"""Get counts of pending news items grouped by star rating"""
db = SessionLocal()
try:
# Count pending news for each star rating (1-5 and NULL)
counts = {}
# Count for each star 1-5
for star in range(1, 6):
count = db.query(func.count(ZOPKNews.id)).filter(
ZOPKNews.status == 'pending',
ZOPKNews.ai_relevance_score == star
).scalar()
counts[star] = count
# Count for NULL (no AI evaluation)
count_null = db.query(func.count(ZOPKNews.id)).filter(
ZOPKNews.status == 'pending',
ZOPKNews.ai_relevance_score.is_(None)
).scalar()
counts[0] = count_null
return jsonify({
'success': True,
'counts': counts
})
except Exception as e:
logger.error(f"Error getting ZOPK news star counts: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd'}), 500
finally:
db.close()
@bp.route('/zopk/news/reject-by-stars', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_reject_by_stars():
"""Reject all pending news items with specified star ratings"""
db = SessionLocal()
try:
data = request.get_json() or {}
stars = data.get('stars', []) # List of star ratings to reject (0 = no rating)
reason = data.get('reason', '')
if not stars:
return jsonify({'success': False, 'error': 'Nie wybrano ocen do odrzucenia'}), 400
# Validate stars input
valid_stars = [s for s in stars if s in [0, 1, 2, 3, 4, 5]]
if not valid_stars:
return jsonify({'success': False, 'error': 'Nieprawidłowe oceny gwiazdkowe'}), 400
# Build query for pending news with specified stars
conditions = []
for star in valid_stars:
if star == 0:
conditions.append(ZOPKNews.ai_relevance_score.is_(None))
else:
conditions.append(ZOPKNews.ai_relevance_score == star)
news_to_reject = db.query(ZOPKNews).filter(
ZOPKNews.status == 'pending',
or_(*conditions)
).all()
count = len(news_to_reject)
# Reject them all
default_reason = f"Masowo odrzucone - oceny: {', '.join(str(s) + '' if s > 0 else 'brak oceny' for s in valid_stars)}"
final_reason = reason if reason else default_reason
for news in news_to_reject:
news.status = 'rejected'
news.moderated_by = current_user.id
news.moderated_at = datetime.now()
news.rejection_reason = final_reason
db.commit()
logger.info(f"Admin {current_user.email} rejected {count} ZOPK news with stars {valid_stars}")
return jsonify({
'success': True,
'message': f'Odrzucono {count} artykułów',
'count': count
})
except Exception as e:
db.rollback()
logger.error(f"Error rejecting ZOPK news by stars: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas odrzucania'}), 500
finally:
db.close()
@bp.route('/zopk/news/evaluate-ai', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_evaluate_ai():
"""Evaluate pending news for ZOPK relevance using Gemini AI"""
from zopk_news_service import evaluate_pending_news
db = SessionLocal()
try:
data = request.get_json() or {}
limit = data.get('limit', 50) # Max 50 to avoid API limits
# Run AI evaluation
result = evaluate_pending_news(db, limit=limit, user_id=current_user.id)
return jsonify({
'success': True,
'total_evaluated': result.get('total_evaluated', 0),
'relevant_count': result.get('relevant_count', 0),
'not_relevant_count': result.get('not_relevant_count', 0),
'errors': result.get('errors', 0),
'message': result.get('message', '')
})
except Exception as e:
db.rollback()
logger.error(f"Error evaluating ZOPK news with AI: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas oceny AI'}), 500
finally:
db.close()
@bp.route('/zopk/news/reevaluate-scores', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_reevaluate_scores():
"""Re-evaluate news items that have ai_relevant but no ai_relevance_score (1-5 stars)"""
from zopk_news_service import reevaluate_news_without_score
db = SessionLocal()
try:
data = request.get_json() or {}
limit = data.get('limit', 50) # Max 50 to avoid API limits
# Run AI re-evaluation for items missing scores
result = reevaluate_news_without_score(db, limit=limit, user_id=current_user.id)
return jsonify({
'success': True,
'total_evaluated': result.get('total_evaluated', 0),
'relevant_count': result.get('relevant_count', 0),
'not_relevant_count': result.get('not_relevant_count', 0),
'errors': result.get('errors', 0),
'message': result.get('message', '')
})
except Exception as e:
db.rollback()
logger.error(f"Error reevaluating ZOPK news scores: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas ponownej oceny'}), 500
finally:
db.close()
@bp.route('/zopk/news/reevaluate-low-scores', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_reevaluate_low_scores():
"""
Re-evaluate news with low AI scores (1-2★) that contain key ZOPK topics.
Useful after updating AI prompt to include new topics like Via Pomerania, S6, NORDA.
Old articles scored low before these topics were recognized will be re-evaluated
and potentially upgraded.
"""
from zopk_news_service import reevaluate_low_score_news
db = SessionLocal()
try:
data = request.get_json() or {}
limit = data.get('limit', 50) # Max 50 to avoid API limits
# Run AI re-evaluation for low-score items with key topics
result = reevaluate_low_score_news(db, limit=limit, user_id=current_user.id)
return jsonify({
'success': True,
'total_evaluated': result.get('total_evaluated', 0),
'upgraded': result.get('upgraded', 0),
'downgraded': result.get('downgraded', 0),
'unchanged': result.get('unchanged', 0),
'errors': result.get('errors', 0),
'message': result.get('message', ''),
'details': result.get('details', [])
})
except Exception as e:
db.rollback()
logger.error(f"Error reevaluating low-score ZOPK news: {e}")
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas ponownej oceny'}), 500
finally:
db.close()
@bp.route('/zopk-api/search-news', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_search_news():
"""
Search for ZOPK news using multiple sources with cross-verification.
Sources:
- Brave Search API
- Google News RSS
- Local media RSS (trojmiasto.pl, dziennikbaltycki.pl)
Cross-verification:
- 1 source → pending (manual review)
- 3+ sources → auto_approved
"""
from zopk_news_service import ZOPKNewsService
db = SessionLocal()
try:
data = request.get_json() or {}
query = data.get('query', 'Zielony Okręg Przemysłowy Kaszubia')
# Create fetch job record
job_id = str(uuid.uuid4())[:8]
fetch_job = ZOPKNewsFetchJob(
job_id=job_id,
search_query=query,
search_api='multi_source', # Brave + RSS
triggered_by='admin',
triggered_by_user=current_user.id,
status='running',
started_at=datetime.now()
)
db.add(fetch_job)
db.commit()
# Use multi-source service
service = ZOPKNewsService(db)
results = service.search_all_sources(query)
# Update fetch job
fetch_job.results_found = results['total_found']
fetch_job.results_new = results['saved_new']
fetch_job.results_approved = results['auto_approved']
fetch_job.status = 'completed'
fetch_job.completed_at = datetime.now()
db.commit()
# Build detailed message
source_info = ', '.join(f"{k}: {v}" for k, v in results['source_stats'].items() if v > 0)
return jsonify({
'success': True,
'message': f"Znaleziono {results['total_found']} wyników z {len(results['source_stats'])} źródeł. "
f"Dodano {results['saved_new']} nowych, zaktualizowano {results['updated_existing']}. "
f"Auto-zatwierdzono: {results['auto_approved']}",
'job_id': job_id,
'total_found': results['total_found'],
'unique_items': results['unique_items'],
'saved_new': results['saved_new'],
'updated_existing': results['updated_existing'],
'auto_approved': results['auto_approved'],
'ai_approved': results.get('ai_approved', 0),
'ai_rejected': results.get('ai_rejected', 0),
'blacklisted': results.get('blacklisted', 0),
'keyword_filtered': results.get('keyword_filtered', 0),
'sent_to_ai': results.get('sent_to_ai', 0),
'duplicates': results.get('duplicates', 0),
'processing_time': results.get('processing_time', 0),
'knowledge_entities_created': results.get('knowledge_entities_created', 0),
'source_stats': results['source_stats'],
'process_log': results.get('process_log', []),
'auto_approved_articles': results.get('auto_approved_articles', []),
'ai_rejected_articles': results.get('ai_rejected_articles', [])
})
except Exception as e:
db.rollback()
logger.error(f"ZOPK news search error: {e}")
# Update job status on error
try:
fetch_job.status = 'failed'
fetch_job.error_message = str(e) # Keep internal log
fetch_job.completed_at = datetime.now()
db.commit()
except:
pass
return jsonify({'success': False, 'error': 'Wystąpił błąd podczas wyszukiwania newsów'}), 500
finally:
db.close()
@bp.route('/zopk-api/search-news-stream')
@login_required
@role_required(SystemRole.ADMIN)
def api_zopk_search_news_stream():
"""SSE endpoint for streaming ZOPK news search progress in real-time."""
from zopk_news_service import ZOPKNewsService
query = request.args.get('query', 'Zielony Okręg Przemysłowy Kaszubia')
user_id = current_user.id # Capture before generator (proxy loses context inside)
def generate():
db = SessionLocal()
try:
# Create fetch job record
job_id = str(uuid.uuid4())[:8]
fetch_job = ZOPKNewsFetchJob(
job_id=job_id,
search_query=query,
search_api='multi_source_sse',
triggered_by='admin',
triggered_by_user=user_id,
status='running',
started_at=datetime.now()
)
db.add(fetch_job)
db.commit()
progress_queue = queue.Queue()
result_holder = [None]
error_holder = [None]
def on_progress(phase, message, current, total):
progress_queue.put((phase, message, current, total))
def run_search():
search_db = SessionLocal()
try:
service = ZOPKNewsService(search_db)
result_holder[0] = service.search_all_sources(
query, user_id=user_id, progress_callback=on_progress
)
except Exception as e:
error_holder[0] = e
finally:
search_db.close()
progress_queue.put(None) # sentinel
thread = threading.Thread(target=run_search, daemon=True)
thread.start()
# Stream progress events
while True:
try:
item = progress_queue.get(timeout=1)
except queue.Empty:
yield f"data: {json.dumps({'type': 'heartbeat'})}\n\n"
continue
if item is None:
break
phase, message, current, total = item
yield f"data: {json.dumps({'type': 'progress', 'phase': phase, 'message': message, 'current': current, 'total': total})}\n\n"
thread.join(timeout=10)
if error_holder[0]:
# Update job on error
try:
fetch_job.status = 'failed'
fetch_job.error_message = str(error_holder[0])
fetch_job.completed_at = datetime.now()
db.commit()
except Exception:
pass
yield f"data: {json.dumps({'type': 'error', 'message': str(error_holder[0])})}\n\n"
else:
results = result_holder[0]
# Update fetch job
fetch_job.results_found = results['total_found']
fetch_job.results_new = results['saved_new']
fetch_job.results_approved = results['auto_approved']
fetch_job.status = 'completed'
fetch_job.completed_at = datetime.now()
db.commit()
yield f"data: {json.dumps({'type': 'complete', 'results': results})}\n\n"
except Exception as e:
logger.error(f"SSE stream error: {e}")
yield f"data: {json.dumps({'type': 'error', 'message': 'Wystąpił błąd podczas wyszukiwania'})}\n\n"
finally:
db.close()
return Response(
stream_with_context(generate()),
mimetype='text/event-stream',
headers={
'Cache-Control': 'no-cache',
'X-Accel-Buffering': 'no',
'Connection': 'keep-alive'
}
)
@bp.route('/zopk/news/scrape-stats')
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_scrape_stats():
"""
Get content scraping statistics.
Returns JSON with:
- total_approved: Total approved/auto_approved articles
- scraped: Successfully scraped articles
- pending: Articles waiting to be scraped
- failed: Failed scraping attempts
- skipped: Skipped (social media, paywalls)
- ready_for_extraction: Scraped but not yet processed for knowledge
"""
from zopk_content_scraper import get_scrape_stats
db = SessionLocal()
try:
stats = get_scrape_stats(db)
return jsonify({
'success': True,
**stats
})
except Exception as e:
logger.error(f"Error getting scrape stats: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/news/scrape-content', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_scrape_content():
"""
Batch scrape article content from source URLs.
Request JSON:
- limit: int (default 50) - max articles to scrape
- force: bool (default false) - re-scrape already scraped
Response:
- scraped: number of successfully scraped
- failed: number of failures
- skipped: number of skipped (social media, etc.)
- errors: list of error details
- scraped_articles: list of scraped article info
"""
from zopk_content_scraper import ZOPKContentScraper
db = SessionLocal()
try:
data = request.get_json() or {}
limit = min(data.get('limit', 50), 100) # Max 100 at once
force = data.get('force', False)
scraper = ZOPKContentScraper(db, user_id=current_user.id)
result = scraper.batch_scrape(limit=limit, force=force)
return jsonify({
'success': True,
'message': f"Scraping zakończony: {result['scraped']} pobrano, "
f"{result['failed']} błędów, {result['skipped']} pominięto",
**result
})
except Exception as e:
db.rollback()
logger.error(f"Error in batch scrape: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/news/<int:news_id>/scrape', methods=['POST'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_scrape_single(news_id):
"""
Scrape content for a single article.
"""
from zopk_content_scraper import ZOPKContentScraper
db = SessionLocal()
try:
scraper = ZOPKContentScraper(db, user_id=current_user.id)
result = scraper.scrape_article(news_id)
if result.success:
return jsonify({
'success': True,
'message': f"Pobrano treść: {result.word_count} słów",
'word_count': result.word_count,
'status': result.status
})
else:
return jsonify({
'success': False,
'error': result.error,
'status': result.status
}), 400
except Exception as e:
db.rollback()
logger.error(f"Error scraping article {news_id}: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@bp.route('/zopk/news/scrape-content/stream', methods=['GET'])
@login_required
@role_required(SystemRole.ADMIN)
def admin_zopk_news_scrape_stream():
"""
Stream scraping progress using Server-Sent Events.
Query params:
- limit: int (default 50)
- force: bool (default false)
"""
from zopk_content_scraper import ZOPKContentScraper, MAX_RETRY_ATTEMPTS
limit = request.args.get('limit', 50, type=int)
force = request.args.get('force', 'false').lower() == 'true'
user_id = current_user.id # Capture before generator (proxy loses context inside)
def generate():
import json
import time as _time
db = SessionLocal()
try:
scraper = ZOPKContentScraper(db, user_id=user_id)
# Get articles to scrape
query = db.query(ZOPKNews).filter(
ZOPKNews.status.in_(['approved', 'auto_approved'])
)
if not force:
query = query.filter(
ZOPKNews.scrape_status.in_(['pending', 'failed']),
ZOPKNews.scrape_attempts < MAX_RETRY_ATTEMPTS
)
articles = query.order_by(ZOPKNews.published_at.desc()).limit(limit).all()
total = len(articles)
if total == 0:
yield f"data: {json.dumps({'status': 'complete', 'message': 'Brak artykułów do scrapowania', 'total': 0, 'details': {'success': 0, 'failed': 0, 'skipped': 0}}, ensure_ascii=False)}\n\n"
return
yield f"data: {json.dumps({'current': 0, 'total': total, 'percent': 0, 'status': 'processing', 'message': f'Rozpoczynam scraping {total} artykułów...'}, ensure_ascii=False)}\n\n"
start_time = _time.time()
scraped = 0
failed = 0
skipped = 0
for i, article in enumerate(articles):
result = scraper.scrape_article(article.id)
if result.success:
scraped += 1
status = 'success'
msg = f'{result.word_count} słów: {article.title[:50]}'
elif result.status == 'skipped':
skipped += 1
status = 'skipped'
msg = f'⊘ Pominięto: {article.title[:50]}'
else:
skipped += 1
status = 'skipped'
msg = f'{(result.error or "Niedostępny")}: {article.title[:40]}'
pct = round((i + 1) / total * 100, 1)
yield f"data: {json.dumps({'current': i + 1, 'total': total, 'percent': pct, 'status': status, 'message': msg, 'details': {'success': scraped, 'scraped': scraped, 'failed': failed, 'skipped': skipped}}, ensure_ascii=False)}\n\n"
processing_time = round(_time.time() - start_time, 2)
yield f"data: {json.dumps({'current': total, 'total': total, 'percent': 100, 'status': 'complete', 'message': f'Zakończono: {scraped} pobrano, {failed} błędów, {skipped} pominięto', 'details': {'success': scraped, 'scraped': scraped, 'failed': failed, 'skipped': skipped, 'processing_time': processing_time}}, ensure_ascii=False)}\n\n"
except Exception as e:
logger.error(f"Error in scrape stream: {e}")
yield f"data: {json.dumps({'status': 'error', 'message': str(e)}, ensure_ascii=False)}\n\n"
finally:
db.close()
return Response(
stream_with_context(generate()),
mimetype='text/event-stream',
headers={
'Cache-Control': 'no-cache',
'X-Accel-Buffering': 'no'
}
)