From 7cc5f033fefbbc0c19fe6fc7a16dda7a8ef60bd2 Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Sat, 17 Jan 2026 00:27:08 +0100 Subject: [PATCH] fix: Poprawiono nazwy kluczy w pipeline skrypcie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zmieniono 'processed' -> 'success' i 'generated' -> 'success' aby pasowały do wartości zwracanych przez batch_extract() i generate_chunk_embeddings(). Co-Authored-By: Claude Opus 4.5 --- scripts/zopk_knowledge_pipeline.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/zopk_knowledge_pipeline.py b/scripts/zopk_knowledge_pipeline.py index 24e8c68..4cbf81d 100755 --- a/scripts/zopk_knowledge_pipeline.py +++ b/scripts/zopk_knowledge_pipeline.py @@ -74,7 +74,8 @@ def run_extraction(db, limit: int = 50) -> dict: service = ZOPKKnowledgeService(db) stats = service.batch_extract(limit=limit) - logger.info(f"Extraction complete: {stats['processed']} processed, {stats['failed']} failed") + logger.info(f"Extraction complete: {stats['success']} success, {stats['failed']} failed") + logger.info(f"Created: {stats['chunks_created']} chunks, {stats['facts_created']} facts, {stats['entities_created']} entities") return stats @@ -88,7 +89,7 @@ def run_embeddings(db, limit: int = 100) -> dict: stats = generate_chunk_embeddings(db, limit=limit) - logger.info(f"Embeddings complete: {stats['generated']} generated, {stats['failed']} failed") + logger.info(f"Embeddings complete: {stats['success']} generated, {stats['failed']} failed") return stats @@ -130,8 +131,9 @@ def main(): logger.info("PIPELINE SUMMARY") logger.info("=" * 60) logger.info(f"Scraping: {scrape_stats['scraped']} success, {scrape_stats['failed']} failed") - logger.info(f"Extraction: {extract_stats.get('processed', 0)} success, {extract_stats.get('failed', 0)} failed") - logger.info(f"Embeddings: {embed_stats.get('generated', 0)} success, {embed_stats.get('failed', 0)} failed") + logger.info(f"Extraction: {extract_stats.get('success', 0)} success, {extract_stats.get('failed', 0)} failed") + logger.info(f" -> Chunks: {extract_stats.get('chunks_created', 0)}, Facts: {extract_stats.get('facts_created', 0)}, Entities: {extract_stats.get('entities_created', 0)}") + logger.info(f"Embeddings: {embed_stats.get('success', 0)} success, {embed_stats.get('failed', 0)} failed") logger.info(f"Duration: {duration:.1f} seconds") logger.info("=" * 60) logger.info("PIPELINE COMPLETED SUCCESSFULLY")