nordabiz/database/migrations/015_zopk_full_content.sql
Maciej Pienczyn 1b4cd31c41 feat(zopk): Knowledge Base + NordaGPT integration (FAZY 0-3)
FAZA 0 - Web Scraping:
- Migracja 015: pola full_content, scrape_status w zopk_news
- zopk_content_scraper.py: scraper z rate limiting i selektorami

FAZA 1 - Knowledge Extraction:
- zopk_knowledge_service.py: chunking, facts, entities extraction
- Endpointy /admin/zopk/knowledge/extract

FAZA 2 - Embeddings:
- gemini_service.py: generate_embedding(), generate_embeddings_batch()
- Model text-embedding-004 (768 dimensions)

FAZA 3 - NordaGPT Integration:
- nordabiz_chat.py: _is_zopk_query(), _get_zopk_knowledge_context()
- System prompt z bazą wiedzy ZOPK
- Semantic search w kontekście chatu

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 20:15:30 +01:00

59 lines
2.8 KiB
SQL

-- Migration 015: Add full_content fields to zopk_news for knowledge base extraction
-- Date: 2026-01-16
-- Purpose: Store scraped article content for AI knowledge extraction
-- ============================================================
-- ADD NEW COLUMNS TO zopk_news
-- ============================================================
-- Full article content (scraped from source URL)
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS full_content TEXT;
-- Content scraping metadata
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS content_scraped_at TIMESTAMP;
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS scrape_status VARCHAR(20) DEFAULT 'pending';
-- Status values: pending, scraped, failed, skipped
-- Scraping error tracking
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS scrape_error TEXT;
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS scrape_attempts INTEGER DEFAULT 0;
-- Content metadata (extracted during scraping)
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS content_word_count INTEGER;
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS content_language VARCHAR(10) DEFAULT 'pl';
-- Knowledge extraction status
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS knowledge_extracted BOOLEAN DEFAULT FALSE;
ALTER TABLE zopk_news ADD COLUMN IF NOT EXISTS knowledge_extracted_at TIMESTAMP;
-- ============================================================
-- INDEXES FOR EFFICIENT QUERYING
-- ============================================================
-- Index for finding articles to scrape
CREATE INDEX IF NOT EXISTS idx_zopk_news_scrape_status ON zopk_news(scrape_status);
-- Index for finding articles ready for knowledge extraction
CREATE INDEX IF NOT EXISTS idx_zopk_news_knowledge_extracted ON zopk_news(knowledge_extracted);
-- Composite index for scraping pipeline
CREATE INDEX IF NOT EXISTS idx_zopk_news_scrape_pipeline
ON zopk_news(status, scrape_status, knowledge_extracted);
-- ============================================================
-- COMMENTS
-- ============================================================
COMMENT ON COLUMN zopk_news.full_content IS 'Full article text scraped from source URL (without HTML, ads, navigation)';
COMMENT ON COLUMN zopk_news.scrape_status IS 'pending=not scraped, scraped=success, failed=error, skipped=not scrapeable';
COMMENT ON COLUMN zopk_news.scrape_error IS 'Error message if scraping failed';
COMMENT ON COLUMN zopk_news.scrape_attempts IS 'Number of scraping attempts (for retry logic)';
COMMENT ON COLUMN zopk_news.content_word_count IS 'Word count of scraped content';
COMMENT ON COLUMN zopk_news.knowledge_extracted IS 'True if chunks/facts/entities extracted';
-- ============================================================
-- GRANT PERMISSIONS
-- ============================================================
GRANT ALL ON TABLE zopk_news TO nordabiz_app;