fix(zopk): Polish error messages and show failures as skipped, not errors
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

Admin was confused by red "Błędy: 2" when scraping/extraction had
expected issues (403, content too short). Changes:
- All scraper/extractor messages translated to Polish
- HTTP 403/404/429 get specific descriptive messages
- Expected failures shown as yellow "Pominięte" instead of red "Błędy"
- "No chunks created" → "Treść za krótka do ekstrakcji"
- Summary label "Błędy" → "Pominięte"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-09 15:36:00 +01:00
parent a174ca3103
commit 3b3bb7bdd7
5 changed files with 24 additions and 16 deletions

View File

@ -239,8 +239,8 @@ def admin_zopk_knowledge_extract_stream():
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'success', 'message': f'{result.chunks_created}ch, {result.facts_created}f, {result.entities_created}e', 'article_id': article.id, 'details': {'new_chunks': result.chunks_created, 'new_facts': result.facts_created, 'new_entities': result.entities_created, **stats}}, ensure_ascii=False)}\n\n"
else:
stats['failed'] += 1
error_msg = result.error[:50] if result.error else 'Nieznany błąd'
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'failed', 'message': f' {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
error_msg = result.error[:60] if result.error else 'Nieznany błąd'
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'skipped', 'message': f' {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
processing_time = round(time.time() - start_time, 2)

View File

@ -859,9 +859,9 @@ def admin_zopk_news_scrape_stream():
status = 'skipped'
msg = f'⊘ Pominięto: {article.title[:50]}'
else:
failed += 1
status = 'failed'
msg = f'{(result.error or "Błąd")[:40]}: {article.title[:40]}'
skipped += 1
status = 'skipped'
msg = f'{(result.error or "Niedostępny")}: {article.title[:40]}'
pct = round((i + 1) / total * 100, 1)
yield f"data: {json.dumps({'current': i + 1, 'total': total, 'percent': pct, 'status': status, 'message': msg, 'details': {'success': scraped, 'scraped': scraped, 'failed': failed, 'skipped': skipped}}, ensure_ascii=False)}\n\n"

View File

@ -2010,7 +2010,7 @@
<span class="summary-value" id="aiOpsSummarySuccess">0</span>
</div>
<div class="summary-row">
<span class="summary-label">✗ Błędy:</span>
<span class="summary-label">⊘ Pominięte:</span>
<span class="summary-value" id="aiOpsSummaryFailed">0</span>
</div>
<div class="summary-row" id="aiOpsSummarySkippedRow" style="display: none;">

View File

@ -454,15 +454,23 @@ class ZOPKContentScraper:
return response.text, None
except requests.exceptions.Timeout:
return None, "Request timeout"
return None, "Przekroczono czas połączenia"
except requests.exceptions.TooManyRedirects:
return None, "Too many redirects"
return None, "Zbyt wiele przekierowań"
except requests.exceptions.HTTPError as e:
return None, f"HTTP error: {e.response.status_code}"
code = e.response.status_code
if code == 403:
return None, "Strona blokuje pobieranie (403)"
elif code == 404:
return None, "Strona nie istnieje (404)"
elif code == 429:
return None, "Za dużo zapytań, spróbuj później (429)"
else:
return None, f"Strona niedostępna ({code})"
except requests.exceptions.ConnectionError:
return None, "Connection error"
return None, "Nie udało się połączyć z serwerem"
except requests.exceptions.RequestException as e:
return None, f"Request error: {str(e)}"
return None, f"Błąd pobierania: {str(e)}"
def _clean_html(self, soup: BeautifulSoup) -> BeautifulSoup:
"""Remove unwanted elements from HTML."""
@ -511,13 +519,13 @@ class ZOPKContentScraper:
content_element = self._find_largest_text_block(soup)
if not content_element:
return None, "Could not find article content"
return None, "Nie znaleziono treści artykułu"
# Extract text
text = self._extract_text(content_element)
if not text or len(text) < 100:
return None, "Extracted content too short"
return None, "Treść artykułu za krótka"
# Truncate if too long
if len(text) > MAX_CONTENT_LENGTH:

View File

@ -578,7 +578,7 @@ class ZOPKKnowledgeService:
return ExtractionResult(
success=True,
news_id=news_id,
error="Already extracted"
error="Już wyekstrahowano"
)
# Check if content is scraped
@ -586,7 +586,7 @@ class ZOPKKnowledgeService:
return ExtractionResult(
success=False,
news_id=news_id,
error="No content scraped"
error="Brak zescrapowanej treści"
)
logger.info(f"Extracting knowledge from news {news_id}: {news.title[:50]}...")
@ -598,7 +598,7 @@ class ZOPKKnowledgeService:
return ExtractionResult(
success=False,
news_id=news_id,
error="No chunks created"
error="Treść za krótka do ekstrakcji"
)
# Statistics