fix(zopk): Naprawa łączenia encji - brak kolumn subject_entity_id/object_entity_id

Problem: ZOPKKnowledgeFact nie ma kolumn subject_entity_id i object_entity_id.
Zamiast tego używa entities_involved (JSONB array).

Zmiany:
- get_entity_merge_preview(): użycie JSONB @> query do liczenia faktów
- merge_entities(): użycie JSONB update do zamiany entity ID w facts

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-17 09:57:30 +01:00
parent b69882bbb2
commit 9ae72bb2fa

View File

@ -1800,8 +1800,7 @@ def merge_entities(
transfers = { transfers = {
'mentions': 0, 'mentions': 0,
'facts_subject': 0, 'facts': 0,
'facts_object': 0,
'relations_source': 0, 'relations_source': 0,
'relations_target': 0 'relations_target': 0
} }
@ -1815,21 +1814,27 @@ def merge_entities(
"""), {'primary_id': primary_id, 'duplicate_id': duplicate_id}) """), {'primary_id': primary_id, 'duplicate_id': duplicate_id})
transfers['mentions'] = result.rowcount transfers['mentions'] = result.rowcount
# 2. Transfer facts (subject) # 2. Transfer facts - update entities_involved JSONB
# Replace duplicate entity ID with primary ID in the JSONB array
result = db_session.execute(text(""" result = db_session.execute(text("""
UPDATE zopk_knowledge_facts UPDATE zopk_knowledge_facts
SET subject_entity_id = :primary_id SET entities_involved = (
WHERE subject_entity_id = :duplicate_id SELECT jsonb_agg(
"""), {'primary_id': primary_id, 'duplicate_id': duplicate_id}) CASE
transfers['facts_subject'] = result.rowcount WHEN (elem->>'id')::int = :duplicate_id
THEN jsonb_set(elem, '{id}', to_jsonb(:primary_id))
# 3. Transfer facts (object) ELSE elem
result = db_session.execute(text(""" END
UPDATE zopk_knowledge_facts )
SET object_entity_id = :primary_id FROM jsonb_array_elements(entities_involved::jsonb) AS elem
WHERE object_entity_id = :duplicate_id )
"""), {'primary_id': primary_id, 'duplicate_id': duplicate_id}) WHERE entities_involved::jsonb @> :entity_json::jsonb
transfers['facts_object'] = result.rowcount """), {
'primary_id': primary_id,
'duplicate_id': duplicate_id,
'entity_json': f'[{{"id": {duplicate_id}}}]'
})
transfers['facts'] = result.rowcount
# 4. Transfer relations (source) # 4. Transfer relations (source)
result = db_session.execute(text(""" result = db_session.execute(text("""
@ -1907,13 +1912,12 @@ def get_entity_merge_preview(
ZOPKKnowledgeEntityMention.entity_id == duplicate_id ZOPKKnowledgeEntityMention.entity_id == duplicate_id
).scalar() or 0 ).scalar() or 0
facts_subject = db_session.query(func.count(ZOPKKnowledgeFact.id)).filter( # Facts use entities_involved (JSONB) not FK columns, so count via JSONB query
ZOPKKnowledgeFact.subject_entity_id == duplicate_id # Count facts where duplicate entity is in entities_involved array
).scalar() or 0 facts_with_entity = db_session.execute(text("""
SELECT COUNT(*) FROM zopk_knowledge_facts
facts_object = db_session.query(func.count(ZOPKKnowledgeFact.id)).filter( WHERE entities_involved::jsonb @> :entity_json::jsonb
ZOPKKnowledgeFact.object_entity_id == duplicate_id """), {'entity_json': f'[{{"id": {duplicate_id}}}]'}).scalar() or 0
).scalar() or 0
relations_source = db_session.query(func.count(ZOPKKnowledgeRelation.id)).filter( relations_source = db_session.query(func.count(ZOPKKnowledgeRelation.id)).filter(
ZOPKKnowledgeRelation.source_entity_id == duplicate_id ZOPKKnowledgeRelation.source_entity_id == duplicate_id
@ -1940,11 +1944,10 @@ def get_entity_merge_preview(
}, },
'transfers': { 'transfers': {
'mentions': mentions, 'mentions': mentions,
'facts_subject': facts_subject, 'facts': facts_with_entity,
'facts_object': facts_object,
'relations_source': relations_source, 'relations_source': relations_source,
'relations_target': relations_target, 'relations_target': relations_target,
'total': mentions + facts_subject + facts_object + relations_source + relations_target 'total': mentions + facts_with_entity + relations_source + relations_target
}, },
'result': { 'result': {
'new_mentions_count': primary.mentions_count + duplicate.mentions_count 'new_mentions_count': primary.mentions_count + duplicate.mentions_count