fix: Improve extraction prompt with JSON structure and fix fact parsing

- Updated prompt to specify expected JSON structure for facts and entities
- Added 'text' field support in fact parsing (alongside 'full_text')
- Listed explicit type values for facts and entities

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-16 21:14:56 +01:00
parent 624114d443
commit 9421f2847f

View File

@ -94,12 +94,25 @@ RELATION_TYPES = [
# Ultra-simplified prompt to avoid Gemini safety filter issues
# Note: Complex JSON schemas with pipe characters were triggering filters
EXTRACTION_USER_PROMPT = """Przeanalizuj artykuł i zwróć JSON.
# Note: max_tokens parameter also triggers filters - don't use it!
EXTRACTION_USER_PROMPT = """Przeanalizuj artykuł i wyodrębnij informacje w formacie JSON.
ARTYKUŁ:
{chunk_text}
Zwróć JSON z facts, entities, summary."""
Zwróć JSON z następującą strukturą:
{{
"facts": [
{{"text": "pełny fakt", "type": "investment"}}
],
"entities": [
{{"name": "Nazwa", "type": "company"}}
],
"summary": "krótkie podsumowanie"
}}
Typy faktów: investment, decision, event, statistic, partnership, milestone
Typy encji: company, person, place, organization, project"""
# System prompt is now empty - the user prompt contains all necessary instructions
EXTRACTION_SYSTEM_PROMPT = ""
@ -429,6 +442,9 @@ class ZOPKKnowledgeService:
except (ValueError, TypeError):
date_value = None
# Support both old format (full_text) and new simplified format (text)
fact_text = fact.get('text') or fact.get('full_text', '')
db_fact = ZOPKKnowledgeFact(
source_chunk_id=chunk.id,
source_news_id=news.id,
@ -436,7 +452,7 @@ class ZOPKKnowledgeService:
subject=fact.get('subject'),
predicate=fact.get('predicate'),
object=fact.get('object'),
full_text=fact.get('full_text', ''),
full_text=fact_text,
numeric_value=numeric_value,
numeric_unit=fact.get('numeric_unit'),
date_value=date_value,