fix(zopk): Translate remaining English messages and unify skip status
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Remaining scraper messages: Domain/Not HTML/Extraction error → Polish
- Embedding failures shown as skipped (yellow) instead of failed (red)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-09 15:44:34 +01:00
parent 1c02d109d7
commit 3c1f920675
2 changed files with 7 additions and 7 deletions

View File

@ -337,11 +337,11 @@ def admin_zopk_embeddings_stream():
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'success', 'message': f'✓ 768 dim: {summary_short}', 'details': stats}, ensure_ascii=False)}\n\n"
else:
stats['failed'] += 1
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f' Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'skipped', 'message': f' Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
except Exception as e:
stats['failed'] += 1
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f' {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'skipped', 'message': f' {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
db.commit()
processing_time = round(time.time() - start_time, 2)

View File

@ -430,7 +430,7 @@ class ZOPKContentScraper:
# Check if domain should be skipped
if self._should_skip_domain(domain):
return None, f"Domain {domain} is not scrapeable (social media/paywall)"
return None, f"Domena {domain} nie do scrapowania (social media/paywall)"
# Apply rate limiting
self._wait_for_rate_limit(domain)
@ -446,7 +446,7 @@ class ZOPKContentScraper:
# Check content type
content_type = response.headers.get('Content-Type', '')
if 'text/html' not in content_type and 'application/xhtml' not in content_type:
return None, f"Not HTML content: {content_type}"
return None, f"Nie jest HTML: {content_type}"
# Detect encoding
response.encoding = response.apparent_encoding or 'utf-8'
@ -536,7 +536,7 @@ class ZOPKContentScraper:
except Exception as e:
logger.error(f"Error extracting content: {e}")
return None, f"Extraction error: {str(e)}"
return None, f"Błąd przetwarzania: {str(e)}"
def _find_largest_text_block(self, soup: BeautifulSoup) -> Optional[BeautifulSoup]:
"""Find the largest text block in the page (fallback method)."""
@ -665,11 +665,11 @@ class ZOPKContentScraper:
# Check if should skip
if self._should_skip_domain(domain):
news.scrape_status = 'skipped'
news.scrape_error = f"Domain {domain} not scrapeable"
news.scrape_error = f"Domena {domain} — pominięta"
self.db.commit()
return ScrapeResult(
success=False,
error=f"Domain {domain} not scrapeable",
error=f"Domena {domain} — pominięta",
status='skipped'
)