fix(zopk): Translate remaining English messages and unify skip status

- Remaining scraper messages: Domain/Not HTML/Extraction error → Polish - Embedding failures shown as skipped (yellow) instead of failed (red) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-09 15:44:34 +01:00 · 2026-02-09 15:44:34 +01:00 · 3c1f920675
commit 3c1f920675
parent 1c02d109d7
2 changed files with 7 additions and 7 deletions
--- a/blueprints/admin/routes_zopk_knowledge.py
+++ b/blueprints/admin/routes_zopk_knowledge.py
@ -337,11 +337,11 @@ def admin_zopk_embeddings_stream():
                        yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'success', 'message': f'✓ 768 dim: {summary_short}', 'details': stats}, ensure_ascii=False)}\n\n"
                    else:
                        stats['failed'] += 1
-                        yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"
+                        yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'skipped', 'message': f'⊘ Brak odpowiedzi API', 'details': stats}, ensure_ascii=False)}\n\n"

                except Exception as e:
                    stats['failed'] += 1
-                    yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'failed', 'message': f'✗ {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"
+                    yield f"data: {json.dumps({'current': idx, 'total': total, 'percent': round(idx/total*100, 1), 'stage': 'embedding', 'status': 'skipped', 'message': f'⊘ {str(e)[:40]}', 'details': {'error': str(e), **stats}}, ensure_ascii=False)}\n\n"

            db.commit()
            processing_time = round(time.time() - start_time, 2)
--- a/zopk_content_scraper.py
+++ b/zopk_content_scraper.py
@ -430,7 +430,7 @@ class ZOPKContentScraper:

        # Check if domain should be skipped
        if self._should_skip_domain(domain):
-            return None, f"Domain {domain} is not scrapeable (social media/paywall)"
+            return None, f"Domena {domain} nie do scrapowania (social media/paywall)"

        # Apply rate limiting
        self._wait_for_rate_limit(domain)
@ -446,7 +446,7 @@ class ZOPKContentScraper:
            # Check content type
            content_type = response.headers.get('Content-Type', '')
            if 'text/html' not in content_type and 'application/xhtml' not in content_type:
-                return None, f"Not HTML content: {content_type}"
+                return None, f"Nie jest HTML: {content_type}"

            # Detect encoding
            response.encoding = response.apparent_encoding or 'utf-8'
@ -536,7 +536,7 @@ class ZOPKContentScraper:

        except Exception as e:
            logger.error(f"Error extracting content: {e}")
-            return None, f"Extraction error: {str(e)}"
+            return None, f"Błąd przetwarzania: {str(e)}"

    def _find_largest_text_block(self, soup: BeautifulSoup) -> Optional[BeautifulSoup]:
        """Find the largest text block in the page (fallback method)."""
@ -665,11 +665,11 @@ class ZOPKContentScraper:
        # Check if should skip
        if self._should_skip_domain(domain):
            news.scrape_status = 'skipped'
-            news.scrape_error = f"Domain {domain} not scrapeable"
+            news.scrape_error = f"Domena {domain} — pominięta"
            self.db.commit()
            return ScrapeResult(
                success=False,
-                error=f"Domain {domain} not scrapeable",
+                error=f"Domena {domain} — pominięta",
                status='skipped'
            )