fix(zopk): Polish error messages and show failures as skipped, not errors
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Admin was confused by red "Błędy: 2" when scraping/extraction had expected issues (403, content too short). Changes: - All scraper/extractor messages translated to Polish - HTTP 403/404/429 get specific descriptive messages - Expected failures shown as yellow "Pominięte" instead of red "Błędy" - "No chunks created" → "Treść za krótka do ekstrakcji" - Summary label "Błędy" → "Pominięte" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
a174ca3103
commit
3b3bb7bdd7
@ -239,8 +239,8 @@ def admin_zopk_knowledge_extract_stream():
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'success', 'message': f'✓ {result.chunks_created}ch, {result.facts_created}f, {result.entities_created}e', 'article_id': article.id, 'details': {'new_chunks': result.chunks_created, 'new_facts': result.facts_created, 'new_entities': result.entities_created, **stats}}, ensure_ascii=False)}\n\n"
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
error_msg = result.error[:50] if result.error else 'Nieznany błąd'
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'failed', 'message': f'✗ {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
|
||||
error_msg = result.error[:60] if result.error else 'Nieznany błąd'
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'extracting', 'status': 'skipped', 'message': f'⊘ {error_msg}', 'article_id': article.id, 'details': {'error': result.error, **stats}}, ensure_ascii=False)}\n\n"
|
||||
|
||||
processing_time = round(time.time() - start_time, 2)
|
||||
|
||||
|
||||
@ -859,9 +859,9 @@ def admin_zopk_news_scrape_stream():
|
||||
status = 'skipped'
|
||||
msg = f'⊘ Pominięto: {article.title[:50]}'
|
||||
else:
|
||||
failed += 1
|
||||
status = 'failed'
|
||||
msg = f'✗ {(result.error or "Błąd")[:40]}: {article.title[:40]}'
|
||||
skipped += 1
|
||||
status = 'skipped'
|
||||
msg = f'⊘ {(result.error or "Niedostępny")}: {article.title[:40]}'
|
||||
|
||||
pct = round((i + 1) / total * 100, 1)
|
||||
yield f"data: {json.dumps({'current': i + 1, 'total': total, 'percent': pct, 'status': status, 'message': msg, 'details': {'success': scraped, 'scraped': scraped, 'failed': failed, 'skipped': skipped}}, ensure_ascii=False)}\n\n"
|
||||
|
||||
@ -2010,7 +2010,7 @@
|
||||
<span class="summary-value" id="aiOpsSummarySuccess">0</span>
|
||||
</div>
|
||||
<div class="summary-row">
|
||||
<span class="summary-label">✗ Błędy:</span>
|
||||
<span class="summary-label">⊘ Pominięte:</span>
|
||||
<span class="summary-value" id="aiOpsSummaryFailed">0</span>
|
||||
</div>
|
||||
<div class="summary-row" id="aiOpsSummarySkippedRow" style="display: none;">
|
||||
|
||||
@ -454,15 +454,23 @@ class ZOPKContentScraper:
|
||||
return response.text, None
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return None, "Request timeout"
|
||||
return None, "Przekroczono czas połączenia"
|
||||
except requests.exceptions.TooManyRedirects:
|
||||
return None, "Too many redirects"
|
||||
return None, "Zbyt wiele przekierowań"
|
||||
except requests.exceptions.HTTPError as e:
|
||||
return None, f"HTTP error: {e.response.status_code}"
|
||||
code = e.response.status_code
|
||||
if code == 403:
|
||||
return None, "Strona blokuje pobieranie (403)"
|
||||
elif code == 404:
|
||||
return None, "Strona nie istnieje (404)"
|
||||
elif code == 429:
|
||||
return None, "Za dużo zapytań, spróbuj później (429)"
|
||||
else:
|
||||
return None, f"Strona niedostępna ({code})"
|
||||
except requests.exceptions.ConnectionError:
|
||||
return None, "Connection error"
|
||||
return None, "Nie udało się połączyć z serwerem"
|
||||
except requests.exceptions.RequestException as e:
|
||||
return None, f"Request error: {str(e)}"
|
||||
return None, f"Błąd pobierania: {str(e)}"
|
||||
|
||||
def _clean_html(self, soup: BeautifulSoup) -> BeautifulSoup:
|
||||
"""Remove unwanted elements from HTML."""
|
||||
@ -511,13 +519,13 @@ class ZOPKContentScraper:
|
||||
content_element = self._find_largest_text_block(soup)
|
||||
|
||||
if not content_element:
|
||||
return None, "Could not find article content"
|
||||
return None, "Nie znaleziono treści artykułu"
|
||||
|
||||
# Extract text
|
||||
text = self._extract_text(content_element)
|
||||
|
||||
if not text or len(text) < 100:
|
||||
return None, "Extracted content too short"
|
||||
return None, "Treść artykułu za krótka"
|
||||
|
||||
# Truncate if too long
|
||||
if len(text) > MAX_CONTENT_LENGTH:
|
||||
|
||||
@ -578,7 +578,7 @@ class ZOPKKnowledgeService:
|
||||
return ExtractionResult(
|
||||
success=True,
|
||||
news_id=news_id,
|
||||
error="Already extracted"
|
||||
error="Już wyekstrahowano"
|
||||
)
|
||||
|
||||
# Check if content is scraped
|
||||
@ -586,7 +586,7 @@ class ZOPKKnowledgeService:
|
||||
return ExtractionResult(
|
||||
success=False,
|
||||
news_id=news_id,
|
||||
error="No content scraped"
|
||||
error="Brak zescrapowanej treści"
|
||||
)
|
||||
|
||||
logger.info(f"Extracting knowledge from news {news_id}: {news.title[:50]}...")
|
||||
@ -598,7 +598,7 @@ class ZOPKKnowledgeService:
|
||||
return ExtractionResult(
|
||||
success=False,
|
||||
news_id=news_id,
|
||||
error="No chunks created"
|
||||
error="Treść za krótka do ekstrakcji"
|
||||
)
|
||||
|
||||
# Statistics
|
||||
|
||||
Loading…
Reference in New Issue
Block a user