fix(zopk): Translate remaining English messages and unify skip status
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Remaining scraper messages: Domain/Not HTML/Extraction error → Polish - Embedding failures shown as skipped (yellow) instead of failed (red) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1c02d109d7
commit
3c1f920675
@ -337,11 +337,11 @@ def admin_zopk_embeddings_stream():
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'success', 'message': f'✓ 768 dim: {summary_short}', 'details': stats}, ensure_ascii=False)}\n\n"
|
||||
else:
|
||||
stats['failed'] += 1
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'skipped', 'message': f'⊘ Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
|
||||
|
||||
except Exception as e:
|
||||
stats['failed'] += 1
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
|
||||
yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'skipped', 'message': f'⊘ {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
|
||||
|
||||
db.commit()
|
||||
processing_time = round(time.time() - start_time, 2)
|
||||
|
||||
@ -430,7 +430,7 @@ class ZOPKContentScraper:
|
||||
|
||||
# Check if domain should be skipped
|
||||
if self._should_skip_domain(domain):
|
||||
return None, f"Domain {domain} is not scrapeable (social media/paywall)"
|
||||
return None, f"Domena {domain} nie do scrapowania (social media/paywall)"
|
||||
|
||||
# Apply rate limiting
|
||||
self._wait_for_rate_limit(domain)
|
||||
@ -446,7 +446,7 @@ class ZOPKContentScraper:
|
||||
# Check content type
|
||||
content_type = response.headers.get('Content-Type', '')
|
||||
if 'text/html' not in content_type and 'application/xhtml' not in content_type:
|
||||
return None, f"Not HTML content: {content_type}"
|
||||
return None, f"Nie jest HTML: {content_type}"
|
||||
|
||||
# Detect encoding
|
||||
response.encoding = response.apparent_encoding or 'utf-8'
|
||||
@ -536,7 +536,7 @@ class ZOPKContentScraper:
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content: {e}")
|
||||
return None, f"Extraction error: {str(e)}"
|
||||
return None, f"Błąd przetwarzania: {str(e)}"
|
||||
|
||||
def _find_largest_text_block(self, soup: BeautifulSoup) -> Optional[BeautifulSoup]:
|
||||
"""Find the largest text block in the page (fallback method)."""
|
||||
@ -665,11 +665,11 @@ class ZOPKContentScraper:
|
||||
# Check if should skip
|
||||
if self._should_skip_domain(domain):
|
||||
news.scrape_status = 'skipped'
|
||||
news.scrape_error = f"Domain {domain} not scrapeable"
|
||||
news.scrape_error = f"Domena {domain} — pominięta"
|
||||
self.db.commit()
|
||||
return ScrapeResult(
|
||||
success=False,
|
||||
error=f"Domain {domain} not scrapeable",
|
||||
error=f"Domena {domain} — pominięta",
|
||||
status='skipped'
|
||||
)
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user