diff --git a/scripts/zopk_knowledge_pipeline.py b/scripts/zopk_knowledge_pipeline.py index 24e8c68..4cbf81d 100755 --- a/scripts/zopk_knowledge_pipeline.py +++ b/scripts/zopk_knowledge_pipeline.py @@ -74,7 +74,8 @@ def run_extraction(db, limit: int = 50) -> dict: service = ZOPKKnowledgeService(db) stats = service.batch_extract(limit=limit) - logger.info(f"Extraction complete: {stats['processed']} processed, {stats['failed']} failed") + logger.info(f"Extraction complete: {stats['success']} success, {stats['failed']} failed") + logger.info(f"Created: {stats['chunks_created']} chunks, {stats['facts_created']} facts, {stats['entities_created']} entities") return stats @@ -88,7 +89,7 @@ def run_embeddings(db, limit: int = 100) -> dict: stats = generate_chunk_embeddings(db, limit=limit) - logger.info(f"Embeddings complete: {stats['generated']} generated, {stats['failed']} failed") + logger.info(f"Embeddings complete: {stats['success']} generated, {stats['failed']} failed") return stats @@ -130,8 +131,9 @@ def main(): logger.info("PIPELINE SUMMARY") logger.info("=" * 60) logger.info(f"Scraping: {scrape_stats['scraped']} success, {scrape_stats['failed']} failed") - logger.info(f"Extraction: {extract_stats.get('processed', 0)} success, {extract_stats.get('failed', 0)} failed") - logger.info(f"Embeddings: {embed_stats.get('generated', 0)} success, {embed_stats.get('failed', 0)} failed") + logger.info(f"Extraction: {extract_stats.get('success', 0)} success, {extract_stats.get('failed', 0)} failed") + logger.info(f" -> Chunks: {extract_stats.get('chunks_created', 0)}, Facts: {extract_stats.get('facts_created', 0)}, Entities: {extract_stats.get('entities_created', 0)}") + logger.info(f"Embeddings: {embed_stats.get('success', 0)} success, {embed_stats.get('failed', 0)} failed") logger.info(f"Duration: {duration:.1f} seconds") logger.info("=" * 60) logger.info("PIPELINE COMPLETED SUCCESSFULLY")