feat(zopk): Implementacja łączenia duplikatów encji (Priorytet 4)
Nowe funkcje w zopk_knowledge_service.py: - find_duplicate_entities() - wyszukiwanie podobnych encji (pg_trgm) - merge_entities() - łączenie encji z transferem relacji - get_entity_merge_preview() - podgląd przed połączeniem Nowe endpointy w app.py: - GET /admin/zopk/knowledge/duplicates - panel zarządzania duplikatami - POST /api/zopk/knowledge/duplicates/preview - podgląd merge - POST /api/zopk/knowledge/duplicates/merge - wykonanie merge Nowy szablon: - templates/admin/zopk_knowledge_duplicates.html - UI z kartami encji Dodatkowo: - Aktualizacja CLAUDE.md z procedurą wdrażania - Skrypt scripts/run_migration.py do uruchamiania migracji SQL Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
6d1f75bce5
commit
143f5c674a
34
CLAUDE.md
34
CLAUDE.md
@ -200,6 +200,40 @@ ssh maciejpi@10.22.68.249 "cd /var/www/nordabiznes && sudo -u www-data git pull
|
||||
- **User:** www-data
|
||||
- **SSL verify:** disabled (`git -c http.sslVerify=false`)
|
||||
|
||||
### Procedura wdrażania (WAŻNE!)
|
||||
|
||||
**Pełna procedura wdrażania z migracjami SQL:**
|
||||
|
||||
```bash
|
||||
# 1. DEV: Push do obu repozytoriów
|
||||
git push origin master && git push inpi master
|
||||
|
||||
# 2. PROD: Pull zmiany
|
||||
ssh maciejpi@10.22.68.249 "cd /var/www/nordabiznes && sudo -u www-data git pull"
|
||||
|
||||
# 3. PROD: Uruchom migracje SQL (jeśli są)
|
||||
ssh maciejpi@10.22.68.249 "cd /var/www/nordabiznes && /var/www/nordabiznes/venv/bin/python3 scripts/run_migration.py database/migrations/XXX_nazwa.sql"
|
||||
|
||||
# 4. PROD: Restart serwisu
|
||||
ssh maciejpi@10.22.68.249 "sudo systemctl restart nordabiznes"
|
||||
|
||||
# 5. Weryfikacja
|
||||
curl -sI https://nordabiznes.pl/health | head -3
|
||||
```
|
||||
|
||||
**⚠️ UWAGI KRYTYCZNE:**
|
||||
|
||||
1. **Migracje SQL** - NIE używaj `psql` bezpośrednio (wymaga hasła). Użyj skryptu `scripts/run_migration.py` który czyta DATABASE_URL z `.env`.
|
||||
|
||||
2. **Uprawnienia logów** - Serwis działa jako `maciejpi` (nie `www-data`). Jeśli pojawi się błąd `Permission denied: /var/log/nordabiznes/*`:
|
||||
```bash
|
||||
ssh maciejpi@10.22.68.249 "sudo chown -R maciejpi:maciejpi /var/log/nordabiznes/"
|
||||
```
|
||||
|
||||
3. **502 po restarcie** - Czasami występuje chwilowy 502. Poczekaj 3-5 sekund i sprawdź ponownie.
|
||||
|
||||
4. **Git pull** - Używaj `sudo -u www-data git pull` (www-data ma dostęp do kluczy SSH).
|
||||
|
||||
## Auto Claude - Konfiguracja i rozwiązywanie problemów
|
||||
|
||||
### Pliki stanu Auto Claude (WAŻNE!)
|
||||
|
||||
96
app.py
96
app.py
@ -11810,6 +11810,102 @@ def api_zopk_chunk_delete(chunk_id):
|
||||
db.close()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# ZOPK ENTITY DUPLICATE MANAGEMENT
|
||||
# ============================================================
|
||||
|
||||
@app.route('/admin/zopk/knowledge/duplicates')
|
||||
@login_required
|
||||
def admin_zopk_knowledge_duplicates():
|
||||
"""Admin page for managing duplicate entities."""
|
||||
if not current_user.is_admin:
|
||||
flash('Brak uprawnień do tej strony.', 'error')
|
||||
return redirect(url_for('dashboard'))
|
||||
|
||||
from zopk_knowledge_service import find_duplicate_entities
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# Get filter parameters
|
||||
entity_type = request.args.get('entity_type', '')
|
||||
min_similarity = float(request.args.get('min_similarity', 0.4))
|
||||
|
||||
# Find duplicates
|
||||
duplicates = find_duplicate_entities(
|
||||
db,
|
||||
entity_type=entity_type if entity_type else None,
|
||||
min_similarity=min_similarity,
|
||||
limit=100
|
||||
)
|
||||
|
||||
# Get unique entity types for filter
|
||||
from database import ZOPKKnowledgeEntity
|
||||
from sqlalchemy import distinct
|
||||
entity_types = [r[0] for r in db.query(distinct(ZOPKKnowledgeEntity.entity_type)).all()]
|
||||
|
||||
return render_template(
|
||||
'admin/zopk_knowledge_duplicates.html',
|
||||
duplicates=duplicates,
|
||||
entity_types=sorted(entity_types),
|
||||
selected_type=entity_type,
|
||||
min_similarity=min_similarity
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.route('/api/zopk/knowledge/duplicates/preview', methods=['POST'])
|
||||
@login_required
|
||||
def api_zopk_duplicates_preview():
|
||||
"""Preview merge operation between two entities."""
|
||||
if not current_user.is_admin:
|
||||
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
|
||||
|
||||
from zopk_knowledge_service import get_entity_merge_preview
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
data = request.get_json() or {}
|
||||
primary_id = data.get('primary_id')
|
||||
duplicate_id = data.get('duplicate_id')
|
||||
|
||||
if not primary_id or not duplicate_id:
|
||||
return jsonify({'success': False, 'error': 'Brak ID encji'}), 400
|
||||
|
||||
preview = get_entity_merge_preview(db, primary_id, duplicate_id)
|
||||
if 'error' in preview:
|
||||
return jsonify({'success': False, 'error': preview['error']}), 404
|
||||
|
||||
return jsonify({'success': True, 'preview': preview})
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@app.route('/api/zopk/knowledge/duplicates/merge', methods=['POST'])
|
||||
@login_required
|
||||
def api_zopk_duplicates_merge():
|
||||
"""Merge two entities - keep primary, delete duplicate."""
|
||||
if not current_user.is_admin:
|
||||
return jsonify({'success': False, 'error': 'Brak uprawnień'}), 403
|
||||
|
||||
from zopk_knowledge_service import merge_entities
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
data = request.get_json() or {}
|
||||
primary_id = data.get('primary_id')
|
||||
duplicate_id = data.get('duplicate_id')
|
||||
new_name = data.get('new_name')
|
||||
|
||||
if not primary_id or not duplicate_id:
|
||||
return jsonify({'success': False, 'error': 'Brak ID encji'}), 400
|
||||
|
||||
result = merge_entities(db, primary_id, duplicate_id, new_name)
|
||||
return jsonify(result)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# KRS AUDIT (Krajowy Rejestr Sądowy)
|
||||
# ============================================================
|
||||
|
||||
@ -324,6 +324,13 @@
|
||||
<div class="quick-link-desc">Firmy, osoby, miejsca, projekty</div>
|
||||
</div>
|
||||
</a>
|
||||
<a href="{{ url_for('admin_zopk_knowledge_duplicates') }}" class="quick-link" style="border-color: #f59e0b;">
|
||||
<div class="quick-link-icon">🔀</div>
|
||||
<div class="quick-link-text">
|
||||
<div class="quick-link-title">Duplikaty</div>
|
||||
<div class="quick-link-desc">Łączenie podobnych encji</div>
|
||||
</div>
|
||||
</a>
|
||||
<a href="{{ url_for('admin_zopk_news') }}" class="quick-link">
|
||||
<div class="quick-link-icon">📰</div>
|
||||
<div class="quick-link-text">
|
||||
|
||||
701
templates/admin/zopk_knowledge_duplicates.html
Normal file
701
templates/admin/zopk_knowledge_duplicates.html
Normal file
@ -0,0 +1,701 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Duplikaty Encji - ZOPK Baza Wiedzy{% endblock %}
|
||||
|
||||
{% block extra_css %}
|
||||
<style>
|
||||
.page-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: var(--spacing-xl);
|
||||
}
|
||||
|
||||
.page-header h1 {
|
||||
font-size: var(--font-size-2xl);
|
||||
color: var(--text-primary);
|
||||
}
|
||||
|
||||
.breadcrumb {
|
||||
display: flex;
|
||||
gap: var(--spacing-xs);
|
||||
color: var(--text-secondary);
|
||||
font-size: var(--font-size-sm);
|
||||
margin-bottom: var(--spacing-lg);
|
||||
}
|
||||
|
||||
.breadcrumb a {
|
||||
color: var(--primary);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
.breadcrumb a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
.filters-bar {
|
||||
display: flex;
|
||||
gap: var(--spacing-md);
|
||||
align-items: center;
|
||||
margin-bottom: var(--spacing-xl);
|
||||
padding: var(--spacing-md);
|
||||
background: var(--surface);
|
||||
border-radius: var(--radius);
|
||||
box-shadow: var(--shadow);
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.filter-group {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: var(--spacing-xs);
|
||||
}
|
||||
|
||||
.filter-group label {
|
||||
font-size: var(--font-size-sm);
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.filter-group select,
|
||||
.filter-group input {
|
||||
padding: 6px 12px;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-sm);
|
||||
font-size: var(--font-size-sm);
|
||||
}
|
||||
|
||||
.duplicates-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: var(--spacing-lg);
|
||||
}
|
||||
|
||||
.duplicate-card {
|
||||
background: var(--surface);
|
||||
border-radius: var(--radius-lg);
|
||||
box-shadow: var(--shadow);
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.duplicate-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
padding: var(--spacing-md) var(--spacing-lg);
|
||||
background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%);
|
||||
border-bottom: 1px solid #fbbf24;
|
||||
}
|
||||
|
||||
.duplicate-type {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: var(--spacing-sm);
|
||||
font-weight: 600;
|
||||
color: #92400e;
|
||||
}
|
||||
|
||||
.similarity-badge {
|
||||
padding: 4px 10px;
|
||||
border-radius: var(--radius);
|
||||
font-size: var(--font-size-sm);
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.similarity-high {
|
||||
background: #dcfce7;
|
||||
color: #166534;
|
||||
}
|
||||
|
||||
.similarity-medium {
|
||||
background: #fef3c7;
|
||||
color: #92400e;
|
||||
}
|
||||
|
||||
.similarity-low {
|
||||
background: #fee2e2;
|
||||
color: #991b1b;
|
||||
}
|
||||
|
||||
.duplicate-body {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr auto 1fr;
|
||||
gap: var(--spacing-lg);
|
||||
padding: var(--spacing-lg);
|
||||
}
|
||||
|
||||
.entity-card {
|
||||
padding: var(--spacing-md);
|
||||
background: var(--background);
|
||||
border-radius: var(--radius);
|
||||
border: 2px solid transparent;
|
||||
cursor: pointer;
|
||||
transition: var(--transition);
|
||||
}
|
||||
|
||||
.entity-card:hover {
|
||||
border-color: var(--primary);
|
||||
}
|
||||
|
||||
.entity-card.selected {
|
||||
border-color: var(--primary);
|
||||
background: #f0fdf4;
|
||||
}
|
||||
|
||||
.entity-card.selected-duplicate {
|
||||
border-color: #ef4444;
|
||||
background: #fee2e2;
|
||||
}
|
||||
|
||||
.entity-name {
|
||||
font-size: var(--font-size-lg);
|
||||
font-weight: 600;
|
||||
color: var(--text-primary);
|
||||
margin-bottom: var(--spacing-xs);
|
||||
}
|
||||
|
||||
.entity-meta {
|
||||
display: flex;
|
||||
gap: var(--spacing-md);
|
||||
font-size: var(--font-size-sm);
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.entity-meta span {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
}
|
||||
|
||||
.merge-arrow {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
gap: var(--spacing-sm);
|
||||
}
|
||||
|
||||
.merge-arrow svg {
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
.merge-arrow span {
|
||||
font-size: var(--font-size-xs);
|
||||
color: var(--text-muted);
|
||||
}
|
||||
|
||||
.duplicate-actions {
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
gap: var(--spacing-md);
|
||||
padding: var(--spacing-md) var(--spacing-lg);
|
||||
border-top: 1px solid var(--border);
|
||||
background: var(--background);
|
||||
}
|
||||
|
||||
.btn {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: var(--spacing-xs);
|
||||
padding: 8px 16px;
|
||||
border-radius: var(--radius);
|
||||
font-size: var(--font-size-sm);
|
||||
font-weight: 500;
|
||||
cursor: pointer;
|
||||
transition: var(--transition);
|
||||
text-decoration: none;
|
||||
border: none;
|
||||
}
|
||||
|
||||
.btn-primary {
|
||||
background: var(--primary);
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-primary:hover {
|
||||
background: var(--primary-dark);
|
||||
}
|
||||
|
||||
.btn-danger {
|
||||
background: #ef4444;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-danger:hover {
|
||||
background: #dc2626;
|
||||
}
|
||||
|
||||
.btn-secondary {
|
||||
background: var(--background);
|
||||
color: var(--text-primary);
|
||||
border: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.btn-secondary:hover {
|
||||
background: var(--surface);
|
||||
}
|
||||
|
||||
.empty-state {
|
||||
text-align: center;
|
||||
padding: var(--spacing-2xl);
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.empty-state svg {
|
||||
width: 64px;
|
||||
height: 64px;
|
||||
color: var(--text-muted);
|
||||
margin-bottom: var(--spacing-md);
|
||||
}
|
||||
|
||||
/* Modal */
|
||||
.modal-overlay {
|
||||
display: none;
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
background: rgba(0,0,0,0.5);
|
||||
z-index: 1000;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.modal-overlay.active {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.modal {
|
||||
background: var(--surface);
|
||||
border-radius: var(--radius-lg);
|
||||
max-width: 600px;
|
||||
width: 90%;
|
||||
max-height: 80vh;
|
||||
overflow-y: auto;
|
||||
box-shadow: var(--shadow-lg);
|
||||
}
|
||||
|
||||
.modal-header {
|
||||
padding: var(--spacing-lg);
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.modal-header h2 {
|
||||
font-size: var(--font-size-xl);
|
||||
}
|
||||
|
||||
.modal-body {
|
||||
padding: var(--spacing-lg);
|
||||
}
|
||||
|
||||
.modal-footer {
|
||||
padding: var(--spacing-lg);
|
||||
border-top: 1px solid var(--border);
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
gap: var(--spacing-md);
|
||||
}
|
||||
|
||||
.preview-section {
|
||||
margin-bottom: var(--spacing-lg);
|
||||
}
|
||||
|
||||
.preview-section h4 {
|
||||
font-size: var(--font-size-sm);
|
||||
color: var(--text-secondary);
|
||||
margin-bottom: var(--spacing-sm);
|
||||
}
|
||||
|
||||
.preview-entities {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr auto 1fr;
|
||||
gap: var(--spacing-md);
|
||||
align-items: center;
|
||||
margin-bottom: var(--spacing-lg);
|
||||
}
|
||||
|
||||
.preview-entity {
|
||||
padding: var(--spacing-md);
|
||||
background: var(--background);
|
||||
border-radius: var(--radius);
|
||||
}
|
||||
|
||||
.preview-entity.keep {
|
||||
border: 2px solid var(--primary);
|
||||
}
|
||||
|
||||
.preview-entity.delete {
|
||||
border: 2px solid #ef4444;
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
.preview-stats {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
|
||||
gap: var(--spacing-sm);
|
||||
}
|
||||
|
||||
.preview-stat {
|
||||
padding: var(--spacing-sm);
|
||||
background: var(--background);
|
||||
border-radius: var(--radius-sm);
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.preview-stat-value {
|
||||
font-size: var(--font-size-xl);
|
||||
font-weight: 700;
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
.preview-stat-label {
|
||||
font-size: var(--font-size-xs);
|
||||
color: var(--text-secondary);
|
||||
}
|
||||
|
||||
.name-input {
|
||||
width: 100%;
|
||||
padding: var(--spacing-sm);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
font-size: var(--font-size-base);
|
||||
margin-top: var(--spacing-xs);
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.duplicate-body {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.merge-arrow {
|
||||
transform: rotate(90deg);
|
||||
}
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="container">
|
||||
<div class="breadcrumb">
|
||||
<a href="{{ url_for('admin_zopk') }}">Panel ZOPK</a>
|
||||
<span>›</span>
|
||||
<a href="{{ url_for('admin_zopk_knowledge_dashboard') }}">Baza Wiedzy</a>
|
||||
<span>›</span>
|
||||
<span>Duplikaty Encji</span>
|
||||
</div>
|
||||
|
||||
<div class="page-header">
|
||||
<h1>🔀 Duplikaty Encji</h1>
|
||||
</div>
|
||||
|
||||
<div class="filters-bar">
|
||||
<form method="get" style="display: contents;">
|
||||
<div class="filter-group">
|
||||
<label for="entity_type">Typ encji:</label>
|
||||
<select name="entity_type" id="entity_type" onchange="this.form.submit()">
|
||||
<option value="">Wszystkie</option>
|
||||
{% for etype in entity_types %}
|
||||
<option value="{{ etype }}" {% if etype == selected_type %}selected{% endif %}>{{ etype }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
</div>
|
||||
<div class="filter-group">
|
||||
<label for="min_similarity">Min. podobieństwo:</label>
|
||||
<input type="range" name="min_similarity" id="min_similarity"
|
||||
min="0.3" max="0.9" step="0.1"
|
||||
value="{{ min_similarity }}"
|
||||
onchange="document.getElementById('sim_value').textContent = this.value; this.form.submit()">
|
||||
<span id="sim_value">{{ min_similarity }}</span>
|
||||
</div>
|
||||
</form>
|
||||
<div style="margin-left: auto;">
|
||||
Znaleziono: <strong>{{ duplicates|length }}</strong> par
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% if duplicates %}
|
||||
<div class="duplicates-list">
|
||||
{% for dup in duplicates %}
|
||||
<div class="duplicate-card" data-pair-id="{{ loop.index }}">
|
||||
<div class="duplicate-header">
|
||||
<div class="duplicate-type">
|
||||
<span>{{ dup.entity1.entity_type }}</span>
|
||||
</div>
|
||||
<span class="similarity-badge {% if dup.similarity > 0.8 %}similarity-high{% elif dup.similarity > 0.6 %}similarity-medium{% else %}similarity-low{% endif %}">
|
||||
{{ (dup.similarity * 100)|round|int }}% podobieństwo
|
||||
{% if dup.match_type == 'substring' %}(substring){% endif %}
|
||||
</span>
|
||||
</div>
|
||||
<div class="duplicate-body">
|
||||
<div class="entity-card"
|
||||
onclick="selectEntity(this, {{ dup.entity1.id }}, 'primary')"
|
||||
data-id="{{ dup.entity1.id }}"
|
||||
data-name="{{ dup.entity1.name }}">
|
||||
<div class="entity-name">{{ dup.entity1.name }}</div>
|
||||
<div class="entity-meta">
|
||||
<span>📊 {{ dup.entity1.mentions_count }} wzmianek</span>
|
||||
{% if dup.entity1.is_verified %}
|
||||
<span>✅ Zweryfikowano</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="merge-arrow">
|
||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<line x1="5" y1="12" x2="19" y2="12"></line>
|
||||
<polyline points="12 5 19 12 12 19"></polyline>
|
||||
</svg>
|
||||
<span>połącz</span>
|
||||
</div>
|
||||
|
||||
<div class="entity-card"
|
||||
onclick="selectEntity(this, {{ dup.entity2.id }}, 'duplicate')"
|
||||
data-id="{{ dup.entity2.id }}"
|
||||
data-name="{{ dup.entity2.name }}">
|
||||
<div class="entity-name">{{ dup.entity2.name }}</div>
|
||||
<div class="entity-meta">
|
||||
<span>📊 {{ dup.entity2.mentions_count }} wzmianek</span>
|
||||
{% if dup.entity2.is_verified %}
|
||||
<span>✅ Zweryfikowano</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="duplicate-actions">
|
||||
<button class="btn btn-secondary" onclick="skipPair({{ loop.index }})">
|
||||
⏭️ Pomiń
|
||||
</button>
|
||||
<button class="btn btn-primary" onclick="openMergeModal({{ loop.index }}, {{ dup.entity1.id }}, {{ dup.entity2.id }})">
|
||||
🔀 Połącz encje
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="empty-state">
|
||||
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<circle cx="12" cy="12" r="10"></circle>
|
||||
<path d="M16 16s-1.5-2-4-2-4 2-4 2"></path>
|
||||
<line x1="9" y1="9" x2="9.01" y2="9"></line>
|
||||
<line x1="15" y1="9" x2="15.01" y2="9"></line>
|
||||
</svg>
|
||||
<h3>Brak duplikatów do wyświetlenia</h3>
|
||||
<p>Spróbuj zmniejszyć próg podobieństwa lub wybierz inny typ encji.</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- Merge Preview Modal -->
|
||||
<div class="modal-overlay" id="mergeModal">
|
||||
<div class="modal">
|
||||
<div class="modal-header">
|
||||
<h2>🔀 Podgląd połączenia encji</h2>
|
||||
</div>
|
||||
<div class="modal-body" id="mergePreviewContent">
|
||||
<p>Ładowanie...</p>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button class="btn btn-secondary" onclick="closeMergeModal()">Anuluj</button>
|
||||
<button class="btn btn-danger" id="confirmMergeBtn" onclick="confirmMerge()">
|
||||
🔀 Połącz encje
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block extra_js %}
|
||||
let currentPrimaryId = null;
|
||||
let currentDuplicateId = null;
|
||||
let currentNewName = null;
|
||||
|
||||
function selectEntity(element, id, role) {
|
||||
const card = element.closest('.duplicate-card');
|
||||
const entities = card.querySelectorAll('.entity-card');
|
||||
|
||||
// Reset selection
|
||||
entities.forEach(e => {
|
||||
e.classList.remove('selected', 'selected-duplicate');
|
||||
});
|
||||
|
||||
// If primary clicked, mark it and mark other as duplicate
|
||||
if (role === 'primary') {
|
||||
element.classList.add('selected');
|
||||
entities.forEach(e => {
|
||||
if (e !== element) e.classList.add('selected-duplicate');
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function skipPair(pairId) {
|
||||
const card = document.querySelector(`[data-pair-id="${pairId}"]`);
|
||||
card.style.opacity = '0.3';
|
||||
card.style.pointerEvents = 'none';
|
||||
}
|
||||
|
||||
function openMergeModal(pairId, id1, id2) {
|
||||
const card = document.querySelector(`[data-pair-id="${pairId}"]`);
|
||||
const entities = card.querySelectorAll('.entity-card');
|
||||
|
||||
// Get selected primary
|
||||
let primaryId = id1;
|
||||
let duplicateId = id2;
|
||||
|
||||
entities.forEach(e => {
|
||||
if (e.classList.contains('selected')) {
|
||||
primaryId = parseInt(e.dataset.id);
|
||||
}
|
||||
if (e.classList.contains('selected-duplicate')) {
|
||||
duplicateId = parseInt(e.dataset.id);
|
||||
}
|
||||
});
|
||||
|
||||
// If nothing selected, use the one with more mentions
|
||||
if (!card.querySelector('.selected')) {
|
||||
const e1 = entities[0];
|
||||
const e2 = entities[1];
|
||||
e1.classList.add('selected');
|
||||
e2.classList.add('selected-duplicate');
|
||||
}
|
||||
|
||||
currentPrimaryId = primaryId;
|
||||
currentDuplicateId = duplicateId;
|
||||
|
||||
// Show modal and fetch preview
|
||||
document.getElementById('mergeModal').classList.add('active');
|
||||
fetchMergePreview(primaryId, duplicateId);
|
||||
}
|
||||
|
||||
function closeMergeModal() {
|
||||
document.getElementById('mergeModal').classList.remove('active');
|
||||
currentPrimaryId = null;
|
||||
currentDuplicateId = null;
|
||||
}
|
||||
|
||||
async function fetchMergePreview(primaryId, duplicateId) {
|
||||
const content = document.getElementById('mergePreviewContent');
|
||||
content.innerHTML = '<p>Ładowanie podglądu...</p>';
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/zopk/knowledge/duplicates/preview', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRFToken': '{{ csrf_token() }}'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
primary_id: primaryId,
|
||||
duplicate_id: duplicateId
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
const p = data.preview;
|
||||
currentNewName = p.primary.name;
|
||||
|
||||
content.innerHTML = `
|
||||
<div class="preview-section">
|
||||
<h4>Encje do połączenia</h4>
|
||||
<div class="preview-entities">
|
||||
<div class="preview-entity keep">
|
||||
<strong>✅ Zachowaj</strong>
|
||||
<div class="entity-name">${p.primary.name}</div>
|
||||
<div class="entity-meta">
|
||||
<span>📊 ${p.primary.mentions_count} wzmianek</span>
|
||||
</div>
|
||||
</div>
|
||||
<svg width="32" height="32" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<line x1="5" y1="12" x2="19" y2="12"></line>
|
||||
<polyline points="12 5 19 12 12 19"></polyline>
|
||||
</svg>
|
||||
<div class="preview-entity delete">
|
||||
<strong>🗑️ Usuń</strong>
|
||||
<div class="entity-name">${p.duplicate.name}</div>
|
||||
<div class="entity-meta">
|
||||
<span>📊 ${p.duplicate.mentions_count} wzmianek</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="preview-section">
|
||||
<h4>Co zostanie przeniesione</h4>
|
||||
<div class="preview-stats">
|
||||
<div class="preview-stat">
|
||||
<div class="preview-stat-value">${p.transfers.mentions}</div>
|
||||
<div class="preview-stat-label">Wzmianki</div>
|
||||
</div>
|
||||
<div class="preview-stat">
|
||||
<div class="preview-stat-value">${p.transfers.facts_subject + p.transfers.facts_object}</div>
|
||||
<div class="preview-stat-label">Fakty</div>
|
||||
</div>
|
||||
<div class="preview-stat">
|
||||
<div class="preview-stat-value">${p.transfers.relations_source + p.transfers.relations_target}</div>
|
||||
<div class="preview-stat-label">Relacje</div>
|
||||
</div>
|
||||
<div class="preview-stat">
|
||||
<div class="preview-stat-value">${p.result.new_mentions_count}</div>
|
||||
<div class="preview-stat-label">Wynik wzmianek</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="preview-section">
|
||||
<h4>Nowa nazwa encji (opcjonalnie)</h4>
|
||||
<input type="text" class="name-input" id="newNameInput"
|
||||
value="${p.primary.name}"
|
||||
placeholder="Pozostaw pustą aby zachować obecną nazwę">
|
||||
</div>
|
||||
`;
|
||||
} else {
|
||||
content.innerHTML = `<p style="color: #ef4444;">Błąd: ${data.error}</p>`;
|
||||
}
|
||||
} catch (error) {
|
||||
content.innerHTML = `<p style="color: #ef4444;">Błąd połączenia: ${error.message}</p>`;
|
||||
}
|
||||
}
|
||||
|
||||
async function confirmMerge() {
|
||||
const btn = document.getElementById('confirmMergeBtn');
|
||||
btn.disabled = true;
|
||||
btn.textContent = 'Łączenie...';
|
||||
|
||||
const newName = document.getElementById('newNameInput')?.value || null;
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/zopk/knowledge/duplicates/merge', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-CSRFToken': '{{ csrf_token() }}'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
primary_id: currentPrimaryId,
|
||||
duplicate_id: currentDuplicateId,
|
||||
new_name: newName !== currentNewName ? newName : null
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
alert(`✅ Encje połączone!\n\nPrzeniesiono:\n- ${data.transfers.mentions} wzmianek\n- ${data.transfers.facts_subject + data.transfers.facts_object} faktów\n- ${data.transfers.relations_source + data.transfers.relations_target} relacji`);
|
||||
closeMergeModal();
|
||||
window.location.reload();
|
||||
} else {
|
||||
alert(`❌ Błąd: ${data.error}`);
|
||||
btn.disabled = false;
|
||||
btn.textContent = '🔀 Połącz encje';
|
||||
}
|
||||
} catch (error) {
|
||||
alert(`❌ Błąd połączenia: ${error.message}`);
|
||||
btn.disabled = false;
|
||||
btn.textContent = '🔀 Połącz encje';
|
||||
}
|
||||
}
|
||||
{% endblock %}
|
||||
@ -1653,3 +1653,300 @@ def delete_chunk(db_session, chunk_id: int) -> bool:
|
||||
db_session.delete(chunk)
|
||||
db_session.commit()
|
||||
return True
|
||||
|
||||
|
||||
# ============================================================
|
||||
# DUPLICATE ENTITY DETECTION AND MERGING
|
||||
# ============================================================
|
||||
|
||||
def find_duplicate_entities(
|
||||
db_session,
|
||||
entity_type: Optional[str] = None,
|
||||
min_similarity: float = 0.5,
|
||||
limit: int = 100
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Find potential duplicate entities using fuzzy matching.
|
||||
|
||||
Uses PostgreSQL pg_trgm extension for similarity matching.
|
||||
Returns pairs of entities that might be duplicates.
|
||||
|
||||
Args:
|
||||
db_session: SQLAlchemy session
|
||||
entity_type: Filter by entity type (company, person, etc.)
|
||||
min_similarity: Minimum similarity threshold (0.0-1.0)
|
||||
limit: Maximum number of pairs to return
|
||||
|
||||
Returns:
|
||||
List of dicts with duplicate pairs:
|
||||
[
|
||||
{
|
||||
'entity1': {...},
|
||||
'entity2': {...},
|
||||
'similarity': 0.85,
|
||||
'match_type': 'fuzzy' # or 'substring'
|
||||
}
|
||||
]
|
||||
"""
|
||||
from sqlalchemy import text
|
||||
|
||||
# Build query with pg_trgm similarity
|
||||
type_filter = f"AND e1.entity_type = '{entity_type}'" if entity_type else ""
|
||||
|
||||
query = text(f"""
|
||||
SELECT
|
||||
e1.id as id1, e1.name as name1, e1.entity_type as type1,
|
||||
e1.mentions_count as mentions1, e1.is_verified as verified1,
|
||||
e2.id as id2, e2.name as name2, e2.entity_type as type2,
|
||||
e2.mentions_count as mentions2, e2.is_verified as verified2,
|
||||
similarity(LOWER(e1.name), LOWER(e2.name)) as sim,
|
||||
CASE
|
||||
WHEN LOWER(e1.name) LIKE '%' || LOWER(e2.name) || '%'
|
||||
OR LOWER(e2.name) LIKE '%' || LOWER(e1.name) || '%'
|
||||
THEN 'substring'
|
||||
ELSE 'fuzzy'
|
||||
END as match_type
|
||||
FROM zopk_knowledge_entities e1
|
||||
JOIN zopk_knowledge_entities e2
|
||||
ON e1.id < e2.id
|
||||
AND e1.entity_type = e2.entity_type
|
||||
WHERE (
|
||||
similarity(LOWER(e1.name), LOWER(e2.name)) > :min_sim
|
||||
OR LOWER(e1.name) LIKE '%' || LOWER(e2.name) || '%'
|
||||
OR LOWER(e2.name) LIKE '%' || LOWER(e1.name) || '%'
|
||||
)
|
||||
{type_filter}
|
||||
ORDER BY
|
||||
e1.entity_type,
|
||||
GREATEST(e1.mentions_count, e2.mentions_count) DESC,
|
||||
sim DESC
|
||||
LIMIT :limit
|
||||
""")
|
||||
|
||||
result = db_session.execute(query, {'min_sim': min_similarity, 'limit': limit})
|
||||
|
||||
duplicates = []
|
||||
for row in result:
|
||||
duplicates.append({
|
||||
'entity1': {
|
||||
'id': row.id1,
|
||||
'name': row.name1,
|
||||
'entity_type': row.type1,
|
||||
'mentions_count': row.mentions1,
|
||||
'is_verified': row.verified1
|
||||
},
|
||||
'entity2': {
|
||||
'id': row.id2,
|
||||
'name': row.name2,
|
||||
'entity_type': row.type2,
|
||||
'mentions_count': row.mentions2,
|
||||
'is_verified': row.verified2
|
||||
},
|
||||
'similarity': float(row.sim) if row.sim else 0.0,
|
||||
'match_type': row.match_type
|
||||
})
|
||||
|
||||
return duplicates
|
||||
|
||||
|
||||
def merge_entities(
|
||||
db_session,
|
||||
primary_id: int,
|
||||
duplicate_id: int,
|
||||
new_name: Optional[str] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Merge two entities - keep primary, delete duplicate.
|
||||
|
||||
Transfers all relationships from duplicate to primary:
|
||||
- Entity mentions
|
||||
- Facts (subject/object references)
|
||||
- Relations (source/target)
|
||||
- Updates mentions_count
|
||||
|
||||
Args:
|
||||
db_session: SQLAlchemy session
|
||||
primary_id: ID of entity to keep
|
||||
duplicate_id: ID of entity to merge and delete
|
||||
new_name: Optional new canonical name for primary
|
||||
|
||||
Returns:
|
||||
Dict with merge results:
|
||||
{
|
||||
'success': True,
|
||||
'primary_id': 123,
|
||||
'deleted_id': 456,
|
||||
'transfers': {
|
||||
'mentions': 15,
|
||||
'facts_subject': 3,
|
||||
'facts_object': 2,
|
||||
'relations_source': 1,
|
||||
'relations_target': 0
|
||||
}
|
||||
}
|
||||
"""
|
||||
from sqlalchemy import text
|
||||
|
||||
# Get both entities
|
||||
primary = db_session.query(ZOPKKnowledgeEntity).get(primary_id)
|
||||
duplicate = db_session.query(ZOPKKnowledgeEntity).get(duplicate_id)
|
||||
|
||||
if not primary:
|
||||
return {'success': False, 'error': f'Primary entity {primary_id} not found'}
|
||||
if not duplicate:
|
||||
return {'success': False, 'error': f'Duplicate entity {duplicate_id} not found'}
|
||||
if primary.entity_type != duplicate.entity_type:
|
||||
return {'success': False, 'error': 'Cannot merge entities of different types'}
|
||||
|
||||
transfers = {
|
||||
'mentions': 0,
|
||||
'facts_subject': 0,
|
||||
'facts_object': 0,
|
||||
'relations_source': 0,
|
||||
'relations_target': 0
|
||||
}
|
||||
|
||||
try:
|
||||
# 1. Transfer mentions
|
||||
result = db_session.execute(text("""
|
||||
UPDATE zopk_knowledge_entity_mentions
|
||||
SET entity_id = :primary_id
|
||||
WHERE entity_id = :duplicate_id
|
||||
"""), {'primary_id': primary_id, 'duplicate_id': duplicate_id})
|
||||
transfers['mentions'] = result.rowcount
|
||||
|
||||
# 2. Transfer facts (subject)
|
||||
result = db_session.execute(text("""
|
||||
UPDATE zopk_knowledge_facts
|
||||
SET subject_entity_id = :primary_id
|
||||
WHERE subject_entity_id = :duplicate_id
|
||||
"""), {'primary_id': primary_id, 'duplicate_id': duplicate_id})
|
||||
transfers['facts_subject'] = result.rowcount
|
||||
|
||||
# 3. Transfer facts (object)
|
||||
result = db_session.execute(text("""
|
||||
UPDATE zopk_knowledge_facts
|
||||
SET object_entity_id = :primary_id
|
||||
WHERE object_entity_id = :duplicate_id
|
||||
"""), {'primary_id': primary_id, 'duplicate_id': duplicate_id})
|
||||
transfers['facts_object'] = result.rowcount
|
||||
|
||||
# 4. Transfer relations (source)
|
||||
result = db_session.execute(text("""
|
||||
UPDATE zopk_knowledge_relations
|
||||
SET source_entity_id = :primary_id
|
||||
WHERE source_entity_id = :duplicate_id
|
||||
"""), {'primary_id': primary_id, 'duplicate_id': duplicate_id})
|
||||
transfers['relations_source'] = result.rowcount
|
||||
|
||||
# 5. Transfer relations (target)
|
||||
result = db_session.execute(text("""
|
||||
UPDATE zopk_knowledge_relations
|
||||
SET target_entity_id = :primary_id
|
||||
WHERE target_entity_id = :duplicate_id
|
||||
"""), {'primary_id': primary_id, 'duplicate_id': duplicate_id})
|
||||
transfers['relations_target'] = result.rowcount
|
||||
|
||||
# 6. Update primary entity
|
||||
primary.mentions_count += duplicate.mentions_count
|
||||
if new_name:
|
||||
primary.canonical_name = new_name
|
||||
|
||||
# Merge aliases
|
||||
if duplicate.aliases:
|
||||
existing_aliases = primary.aliases or []
|
||||
new_aliases = duplicate.aliases
|
||||
# Add duplicate name as alias
|
||||
if duplicate.name not in existing_aliases:
|
||||
existing_aliases.append(duplicate.name)
|
||||
# Add duplicate's aliases
|
||||
for alias in new_aliases:
|
||||
if alias not in existing_aliases:
|
||||
existing_aliases.append(alias)
|
||||
primary.aliases = existing_aliases
|
||||
|
||||
# 7. Delete duplicate
|
||||
db_session.delete(duplicate)
|
||||
|
||||
db_session.commit()
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'primary_id': primary_id,
|
||||
'deleted_id': duplicate_id,
|
||||
'new_mentions_count': primary.mentions_count,
|
||||
'transfers': transfers
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
db_session.rollback()
|
||||
logger.error(f"Error merging entities: {e}")
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
|
||||
def get_entity_merge_preview(
|
||||
db_session,
|
||||
primary_id: int,
|
||||
duplicate_id: int
|
||||
) -> Dict:
|
||||
"""
|
||||
Preview what would happen if two entities are merged.
|
||||
|
||||
Returns counts of items that would be transferred.
|
||||
"""
|
||||
from sqlalchemy import text, func
|
||||
|
||||
primary = db_session.query(ZOPKKnowledgeEntity).get(primary_id)
|
||||
duplicate = db_session.query(ZOPKKnowledgeEntity).get(duplicate_id)
|
||||
|
||||
if not primary or not duplicate:
|
||||
return {'error': 'Entity not found'}
|
||||
|
||||
# Count items that would be transferred
|
||||
mentions = db_session.query(func.count(ZOPKKnowledgeEntityMention.id)).filter(
|
||||
ZOPKKnowledgeEntityMention.entity_id == duplicate_id
|
||||
).scalar() or 0
|
||||
|
||||
facts_subject = db_session.query(func.count(ZOPKKnowledgeFact.id)).filter(
|
||||
ZOPKKnowledgeFact.subject_entity_id == duplicate_id
|
||||
).scalar() or 0
|
||||
|
||||
facts_object = db_session.query(func.count(ZOPKKnowledgeFact.id)).filter(
|
||||
ZOPKKnowledgeFact.object_entity_id == duplicate_id
|
||||
).scalar() or 0
|
||||
|
||||
relations_source = db_session.query(func.count(ZOPKKnowledgeRelation.id)).filter(
|
||||
ZOPKKnowledgeRelation.source_entity_id == duplicate_id
|
||||
).scalar() or 0
|
||||
|
||||
relations_target = db_session.query(func.count(ZOPKKnowledgeRelation.id)).filter(
|
||||
ZOPKKnowledgeRelation.target_entity_id == duplicate_id
|
||||
).scalar() or 0
|
||||
|
||||
return {
|
||||
'primary': {
|
||||
'id': primary.id,
|
||||
'name': primary.name,
|
||||
'entity_type': primary.entity_type,
|
||||
'mentions_count': primary.mentions_count,
|
||||
'aliases': primary.aliases or []
|
||||
},
|
||||
'duplicate': {
|
||||
'id': duplicate.id,
|
||||
'name': duplicate.name,
|
||||
'entity_type': duplicate.entity_type,
|
||||
'mentions_count': duplicate.mentions_count,
|
||||
'aliases': duplicate.aliases or []
|
||||
},
|
||||
'transfers': {
|
||||
'mentions': mentions,
|
||||
'facts_subject': facts_subject,
|
||||
'facts_object': facts_object,
|
||||
'relations_source': relations_source,
|
||||
'relations_target': relations_target,
|
||||
'total': mentions + facts_subject + facts_object + relations_source + relations_target
|
||||
},
|
||||
'result': {
|
||||
'new_mentions_count': primary.mentions_count + duplicate.mentions_count
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user