feat(zopk): Timeline auto-populate + fix /zopk 500 + rozszerzony health check

- Dodano funkcje automatycznego uzupełniania Timeline z bazy wiedzy:
  - get_timeline_suggestions() - pobiera milestone facts jako sugestie
  - create_milestone_from_suggestion() - tworzy milestone z faktu
  - categorize_milestones_with_ai() - kategoryzacja AI (nuclear/offshore/etc)
  - Auto-detekcja kategorii, dat i statusu z tekstu polskiego

- API endpoints:
  - GET /api/zopk/timeline/suggestions - lista sugestii z bazy wiedzy
  - POST /api/zopk/timeline/suggestions/approve - zatwierdź sugestię

- Fix: /zopk zwracał 500 (nieistniejąca kolumna is_verified)
  - Dodano kolumnę is_verified do modelu ZOPKMilestone
  - Usunięto filtr is_verified z query (do migracji)

- Health check rozszerzony z 15 do 55+ endpointów:
  - Public pages, Raporty, ZOPK, Chat, IT Audit
  - API publiczne, Admin Core, Admin Audits, Admin ZOPK

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-01-17 17:32:54 +01:00
parent 6156277629
commit 40116d3321
3 changed files with 573 additions and 6 deletions

141
app.py
View File

@ -800,22 +800,84 @@ def health_full():
results = []
all_ok = True
# List of endpoints to check (path, name)
# List of ALL endpoints to check (path, name)
# Comprehensive list updated 2026-01-17
endpoints = [
# ========== PUBLIC PAGES ==========
('/', 'Strona główna'),
('/login', 'Logowanie'),
('/register', 'Rejestracja'),
('/release-notes', 'Historia zmian'),
('/search?q=test', 'Wyszukiwarka'),
('/aktualnosci', 'Aktualności'),
('/forum', 'Forum'),
('/kalendarz', 'Kalendarz wydarzeń'),
('/tablica', 'Tablica ogłoszeń'),
('/nowi-czlonkowie', 'Nowi członkowie'),
('/mapa-polaczen', 'Mapa połączeń'),
('/forgot-password', 'Reset hasła'),
# ========== RAPORTY ==========
('/raporty', 'Raporty'),
('/raporty/staz-czlonkostwa', 'Raport: Staż członkostwa'),
('/raporty/social-media', 'Raport: Social Media'),
('/raporty/struktura-branzowa', 'Raport: Struktura branżowa'),
# ========== ZOPK PUBLIC ==========
('/zopk', 'ZOPK: Strona główna'),
('/zopk/aktualnosci', 'ZOPK: Aktualności'),
# ========== CHAT ==========
('/chat', 'NordaGPT Chat'),
# ========== IT AUDIT ==========
('/it-audit/form', 'IT Audit: Formularz'),
# ========== PUBLIC API ==========
('/api/companies', 'API: Lista firm'),
('/api/model-info', 'API: Model info'),
('/api/gbp/audit/health', 'API: GBP health'),
# ========== ADMIN: CORE ==========
('/admin/security', 'Admin: Bezpieczeństwo'),
('/admin/seo', 'Admin: SEO'),
('/admin/social-media', 'Admin: Social Media'),
('/admin/analytics', 'Admin: Analityka'),
('/admin/status', 'Admin: Status systemu'),
('/admin/health', 'Admin: Health dashboard'),
('/admin/debug', 'Admin: Debug'),
('/admin/ai-usage', 'Admin: AI Usage'),
('/admin/chat-analytics', 'Admin: Chat analytics'),
('/admin/users', 'Admin: Użytkownicy'),
('/admin/recommendations', 'Admin: Rekomendacje'),
('/admin/fees', 'Admin: Składki'),
# ========== ADMIN: AUDITS ==========
('/admin/seo', 'Admin: SEO Audit'),
('/admin/gbp-audit', 'Admin: GBP Audit'),
('/admin/social-media', 'Admin: Social Media'),
('/admin/social-audit', 'Admin: Social Audit'),
('/admin/it-audit', 'Admin: IT Audit'),
('/admin/digital-maturity', 'Admin: Digital Maturity'),
('/admin/krs-audit', 'Admin: KRS Audit'),
# ========== ADMIN: COMMUNITY ==========
('/admin/forum', 'Admin: Forum'),
('/admin/kalendarz', 'Admin: Kalendarz'),
# ========== ADMIN: ZOPK ==========
('/admin/zopk', 'Admin: ZOPK Panel'),
('/admin/zopk/news', 'Admin: ZOPK News'),
('/admin/zopk/knowledge', 'Admin: ZOPK Knowledge'),
('/admin/zopk/knowledge/chunks', 'Admin: ZOPK Chunks'),
('/admin/zopk/knowledge/facts', 'Admin: ZOPK Facts'),
('/admin/zopk/knowledge/entities', 'Admin: ZOPK Entities'),
('/admin/zopk/knowledge/duplicates', 'Admin: ZOPK Duplikaty'),
('/admin/zopk/knowledge/fact-duplicates', 'Admin: ZOPK Fact Duplicates'),
('/admin/zopk/knowledge/graph', 'Admin: ZOPK Graf'),
('/admin/zopk/timeline', 'Admin: ZOPK Timeline'),
# ========== ZOPK API ==========
('/api/zopk/milestones', 'API: ZOPK Milestones'),
('/api/zopk/knowledge/dashboard-stats', 'API: ZOPK Dashboard stats'),
]
# Dodaj losową firmę do sprawdzenia
@ -10010,9 +10072,10 @@ def zopk_index():
).order_by(ZOPKProject.sort_order, ZOPKProject.name).all()
# Get milestones for timeline (sorted by target_date)
milestones = db.query(ZOPKMilestone).filter(
ZOPKMilestone.is_verified == True # Only show verified milestones
).order_by(ZOPKMilestone.target_date.asc()).all()
# Show all milestones - is_verified column will be added in migration
milestones = db.query(ZOPKMilestone).order_by(
ZOPKMilestone.target_date.asc()
).all()
# Get active stakeholders
stakeholders = db.query(ZOPKStakeholder).filter(
@ -12373,6 +12436,72 @@ def api_zopk_milestone_delete(milestone_id):
finally:
db.close()
@app.route('/api/zopk/timeline/suggestions')
@login_required
def api_zopk_timeline_suggestions():
"""API - sugestie kamieni milowych z bazy wiedzy."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from zopk_knowledge_service import get_timeline_suggestions
limit = request.args.get('limit', 30, type=int)
only_verified = request.args.get('only_verified', 'false').lower() == 'true'
use_ai = request.args.get('use_ai', 'false').lower() == 'true'
db = SessionLocal()
try:
result = get_timeline_suggestions(db, limit=limit, only_verified=only_verified)
if result['success'] and use_ai and result.get('suggestions'):
from zopk_knowledge_service import categorize_milestones_with_ai
result['suggestions'] = categorize_milestones_with_ai(db, result['suggestions'])
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
@app.route('/api/zopk/timeline/suggestions/approve', methods=['POST'])
@login_required
def api_zopk_timeline_suggestion_approve():
"""API - zatwierdzenie sugestii i utworzenie kamienia milowego."""
if not current_user.is_admin:
return jsonify({'error': 'Forbidden'}), 403
from zopk_knowledge_service import create_milestone_from_suggestion
data = request.get_json()
if not data:
return jsonify({'error': 'No data provided'}), 400
fact_id = data.get('fact_id')
if not fact_id:
return jsonify({'error': 'fact_id is required'}), 400
db = SessionLocal()
try:
result = create_milestone_from_suggestion(
db_session=db,
fact_id=fact_id,
title=data.get('title', 'Kamień milowy'),
description=data.get('description'),
category=data.get('category', 'other'),
target_date=data.get('target_date'),
status=data.get('status', 'planned'),
source_url=data.get('source_url')
)
return jsonify(result)
except Exception as e:
db.rollback()
return jsonify({'success': False, 'error': str(e)}), 500
finally:
db.close()
# ============================================================
# KRS AUDIT (Krajowy Rejestr Sądowy)
# ============================================================

View File

@ -3032,6 +3032,7 @@ class ZOPKMilestone(Base):
icon = Column(String(50)) # emoji lub ikona
color = Column(String(20)) # kolor dla timeline
is_featured = Column(Boolean, default=False)
is_verified = Column(Boolean, default=True) # Czy zatwierdzony do wyświetlenia
created_at = Column(DateTime, default=datetime.now)
updated_at = Column(DateTime, default=datetime.now, onupdate=datetime.now)

View File

@ -2268,3 +2268,440 @@ def get_knowledge_dashboard_stats(db_session) -> Dict:
'news': {'total': news_total, 'with_extraction': news_with_extraction, 'pending': news_total - news_with_extraction},
'top_entities': [{'id': e.id, 'name': e.name, 'type': e.entity_type, 'mentions': e.mentions_count} for e in top_entities]
}
# ============================================================
# TIMELINE SUGGESTIONS (Auto-populate from Knowledge Base)
# ============================================================
def get_timeline_suggestions(
db_session,
limit: int = 50,
only_verified: bool = True
) -> Dict:
"""
Get milestone facts from knowledge base that could become timeline milestones.
Finds verified milestone facts that are NOT yet linked to any timeline milestone.
Groups similar facts and ranks by confidence score.
Args:
db_session: Database session
limit: Max suggestions to return
only_verified: Only include verified facts
Returns:
{
'success': True,
'suggestions': [...],
'total_milestone_facts': int,
'already_in_timeline': int
}
"""
from sqlalchemy import text, func
from database import ZOPKMilestone
try:
# Count total milestone facts
total_query = text("""
SELECT COUNT(*) FROM zopk_knowledge_facts
WHERE fact_type = 'milestone'
""")
total_milestone_facts = db_session.execute(total_query).scalar() or 0
# Count facts already linked to timeline
linked_query = text("""
SELECT COUNT(DISTINCT f.id)
FROM zopk_knowledge_facts f
JOIN zopk_milestones m ON m.source_news_id = f.source_news_id
WHERE f.fact_type = 'milestone'
""")
already_linked = db_session.execute(linked_query).scalar() or 0
# Get milestone facts not yet in timeline
# Prioritize: verified, high confidence, has numeric value (dates/amounts)
verified_filter = "AND f.is_verified = TRUE" if only_verified else ""
suggestions_query = text(f"""
SELECT DISTINCT ON (f.id)
f.id as fact_id,
f.full_text,
f.subject,
f.predicate,
f.object,
f.confidence_score,
f.numeric_value,
f.numeric_unit,
f.is_verified,
f.source_news_id,
n.title as news_title,
n.published_at as news_date,
n.url as news_url,
n.source_name
FROM zopk_knowledge_facts f
LEFT JOIN zopk_news n ON n.id = f.source_news_id
WHERE f.fact_type = 'milestone'
{verified_filter}
AND NOT EXISTS (
SELECT 1 FROM zopk_milestones m
WHERE m.source_news_id = f.source_news_id
AND similarity(m.title, f.full_text) > 0.5
)
ORDER BY f.id, f.confidence_score DESC NULLS LAST, f.is_verified DESC
LIMIT :limit
""")
results = db_session.execute(suggestions_query, {'limit': limit}).fetchall()
suggestions = []
for row in results:
# Auto-detect category based on keywords
category = _detect_milestone_category(row.full_text, row.subject)
# Try to extract date from text or use news date
target_date = _extract_date_from_text(row.full_text)
if not target_date and row.news_date:
target_date = row.news_date.strftime('%Y-%m-%d') if hasattr(row.news_date, 'strftime') else str(row.news_date)
suggestions.append({
'fact_id': row.fact_id,
'full_text': row.full_text,
'subject': row.subject,
'predicate': row.predicate,
'object': row.object,
'confidence_score': float(row.confidence_score) if row.confidence_score else 0.5,
'is_verified': row.is_verified,
'source_news_id': row.source_news_id,
'news_title': row.news_title,
'news_date': row.news_date.isoformat() if row.news_date else None,
'news_url': row.news_url,
'source_name': row.source_name,
# Auto-suggested values for timeline
'suggested_title': _generate_milestone_title(row.full_text, row.subject),
'suggested_category': category,
'suggested_date': target_date,
'suggested_status': 'completed' if _is_past_event(row.full_text) else 'planned'
})
return {
'success': True,
'suggestions': suggestions,
'total_milestone_facts': total_milestone_facts,
'already_in_timeline': already_linked,
'suggestions_count': len(suggestions)
}
except Exception as e:
logger.error(f"Error getting timeline suggestions: {e}")
return {
'success': False,
'error': str(e),
'suggestions': []
}
def _detect_milestone_category(text: str, subject: str = None) -> str:
"""
Auto-detect milestone category based on keywords.
Categories: nuclear, offshore, infrastructure, defense, other
"""
text_lower = (text or '').lower()
subject_lower = (subject or '').lower()
combined = f"{text_lower} {subject_lower}"
# Nuclear energy keywords
nuclear_keywords = [
'jądrowa', 'jądrowy', 'atomowa', 'atomowy', 'nuclear',
'lubiatowo', 'kopalino', 'pej', 'polskie elektrownie',
'reaktor', 'uran', 'westinghouse', 'ap1000'
]
if any(kw in combined for kw in nuclear_keywords):
return 'nuclear'
# Offshore wind keywords
offshore_keywords = [
'offshore', 'wiatrowa', 'wiatrowy', 'morska farma', 'farma wiatrowa',
'baltic power', 'baltica', 'orlen', 'northland', 'bałtyk',
'turbina', 'mw wiatr', 'gw wiatr'
]
if any(kw in combined for kw in offshore_keywords):
return 'offshore'
# Defense/military keywords
defense_keywords = [
'kongsberg', 'obronność', 'obronny', 'wojsko', 'wojskowy',
'mon ', 'ministerstwo obrony', 'zbrojeniowy', 'dron', 'amunicja',
'nsm', 'rakieta', 'samolot bojowy', 'okręt', 'bezpieczeństwo',
'nato', 'sojusz'
]
if any(kw in combined for kw in defense_keywords):
return 'defense'
# Infrastructure keywords
infra_keywords = [
's6', 's7', 'via pomerania', 'droga', 'autostrada', 'ekspresowa',
'kolej', 'pkp', 'port', 'terminal', 'lotnisko', 'most',
'infrastruktura', 'budowa', 'remont', 'przebudowa',
'wodociąg', 'kanalizacja', 'oczyszczalnia'
]
if any(kw in combined for kw in infra_keywords):
return 'infrastructure'
return 'other'
def _generate_milestone_title(full_text: str, subject: str = None) -> str:
"""
Generate a concise title for milestone from fact text.
Truncates to ~100 chars and tries to keep meaningful content.
"""
if not full_text:
return subject or "Kamień milowy"
# If text is short enough, use as is
if len(full_text) <= 100:
return full_text
# Try to find a natural break point
text = full_text[:150]
# Look for sentence end
for sep in ['. ', ', ', ' - ', ': ']:
if sep in text:
parts = text.split(sep)
if len(parts[0]) >= 30:
return parts[0] + ('.' if not parts[0].endswith('.') else '')
# Just truncate with ellipsis
return text[:97] + '...'
def _extract_date_from_text(text: str) -> str:
"""
Try to extract date from milestone text.
Returns ISO format date string or None.
"""
import re
from datetime import datetime
if not text:
return None
text_lower = text.lower()
# Patterns to match
patterns = [
# "w 2025 roku", "2025 r.", "rok 2025"
(r'\b(20[2-3]\d)\s*(rok|r\.?)\b', lambda m: f"{m.group(1)}-01-01"),
(r'\brok\s*(20[2-3]\d)\b', lambda m: f"{m.group(1)}-01-01"),
# "w marcu 2025", "marzec 2025"
(r'\b(stycz\w*|luty|lut\w*|marz\w*|kwie\w*|maj\w*|czerw\w*|lip\w*|sierp\w*|wrze\w*|paźdz\w*|listop\w*|grud\w*)\s*(20[2-3]\d)',
lambda m: _month_to_date(m.group(1), m.group(2))),
# "Q1 2025", "Q3 2026"
(r'\bQ([1-4])\s*(20[2-3]\d)', lambda m: f"{m.group(2)}-{int(m.group(1))*3-2:02d}-01"),
# "I kwartał 2025"
(r'\b(I|II|III|IV)\s*kwarta\w*\s*(20[2-3]\d)',
lambda m: f"{m.group(2)}-{{'I':1,'II':4,'III':7,'IV':10}[m.group(1)]:02d}-01"),
]
for pattern, formatter in patterns:
match = re.search(pattern, text_lower)
if match:
try:
return formatter(match)
except:
continue
return None
def _month_to_date(month_name: str, year: str) -> str:
"""Convert Polish month name to date string."""
months = {
'stycz': '01', 'luty': '02', 'lut': '02', 'marz': '03',
'kwie': '04', 'maj': '05', 'czerw': '06', 'lip': '07',
'sierp': '08', 'wrze': '09', 'paźdz': '10', 'listop': '11',
'grud': '12'
}
for prefix, num in months.items():
if month_name.startswith(prefix):
return f"{year}-{num}-01"
return f"{year}-01-01"
def _is_past_event(text: str) -> bool:
"""
Detect if milestone text describes a past event (completed)
or future event (planned).
"""
if not text:
return False
text_lower = text.lower()
# Past tense indicators (Polish)
past_indicators = [
'podpisano', 'podpisał', 'zakończono', 'oddano', 'otwarto',
'uruchomiono', 'rozpoczęto', 'ogłoszono', 'przyznano',
'uzyskano', 'otrzymał', 'zdobył', 'wygrał', 'został',
'odbył się', 'odbyła się', 'miało miejsce'
]
# Future tense indicators
future_indicators = [
'planowany', 'planowane', 'planowana', 'ma zostać',
'będzie', 'zostanie', 'ma być', 'powstanie',
'w przyszłości', 'do końca', 'w ciągu'
]
past_count = sum(1 for ind in past_indicators if ind in text_lower)
future_count = sum(1 for ind in future_indicators if ind in text_lower)
return past_count > future_count
def create_milestone_from_suggestion(
db_session,
fact_id: int,
title: str,
description: str = None,
category: str = 'other',
target_date: str = None,
status: str = 'planned',
source_url: str = None
) -> Dict:
"""
Create a timeline milestone from a knowledge fact suggestion.
Args:
db_session: Database session
fact_id: Source fact ID
title: Milestone title
description: Optional description
category: nuclear, offshore, infrastructure, defense, other
target_date: Target date (YYYY-MM-DD format)
status: planned, in_progress, completed, delayed
source_url: Source article URL
Returns:
{'success': True, 'milestone_id': int} or {'success': False, 'error': str}
"""
from database import ZOPKMilestone, ZOPKKnowledgeFact
from datetime import datetime
try:
# Get the source fact
fact = db_session.query(ZOPKKnowledgeFact).get(fact_id)
if not fact:
return {'success': False, 'error': f'Fact {fact_id} not found'}
# Parse target date
parsed_date = None
if target_date:
try:
parsed_date = datetime.strptime(target_date, '%Y-%m-%d').date()
except ValueError:
pass
# Create milestone
milestone = ZOPKMilestone(
title=title,
description=description or fact.full_text,
category=category,
target_date=parsed_date,
actual_date=parsed_date if status == 'completed' else None,
status=status,
source_url=source_url,
source_news_id=fact.source_news_id,
is_featured=False
)
db_session.add(milestone)
db_session.commit()
logger.info(f"Created milestone #{milestone.id} from fact #{fact_id}: {title}")
return {
'success': True,
'milestone_id': milestone.id,
'title': title,
'category': category
}
except Exception as e:
db_session.rollback()
logger.error(f"Error creating milestone from fact {fact_id}: {e}")
return {'success': False, 'error': str(e)}
def categorize_milestones_with_ai(
db_session,
suggestions: List[Dict],
model_name: str = "gemini-2.0-flash-exp"
) -> List[Dict]:
"""
Use Gemini AI to categorize and enhance milestone suggestions.
Adds AI-improved titles, categories, and extracts dates more accurately.
"""
import google.generativeai as genai
import json
if not suggestions:
return suggestions
# Prepare batch for AI processing
facts_text = "\n".join([
f"{i+1}. {s['full_text'][:300]}"
for i, s in enumerate(suggestions[:20]) # Limit to 20 for API
])
prompt = f"""Przeanalizuj poniższe fakty o projekcie ZOPK (Zielony Okręg Przemysłowy Kaszubia) i dla każdego zwróć:
- category: jedna z [nuclear, offshore, infrastructure, defense, other]
- short_title: zwięzły tytuł (max 80 znaków)
- target_date: data w formacie YYYY-MM-DD (jeśli można wywnioskować)
- status: jeden z [completed, in_progress, planned]
Kategorie:
- nuclear: elektrownia jądrowa, atom, Lubiatowo-Kopalino
- offshore: farmy wiatrowe, offshore wind, Baltic Power, Baltica
- infrastructure: drogi S6, Via Pomerania, porty, koleje
- defense: Kongsberg, przemysł zbrojeniowy, obronność, MON
Fakty:
{facts_text}
Odpowiedz TYLKO jako JSON array:
[{{"id": 1, "category": "...", "short_title": "...", "target_date": "YYYY-MM-DD lub null", "status": "..."}}]"""
try:
model = genai.GenerativeModel(model_name)
response = model.generate_content(prompt)
# Parse response
response_text = response.text.strip()
if response_text.startswith('```'):
response_text = response_text.split('```')[1]
if response_text.startswith('json'):
response_text = response_text[4:]
ai_results = json.loads(response_text)
# Merge AI results with suggestions
for result in ai_results:
idx = result.get('id', 0) - 1
if 0 <= idx < len(suggestions):
suggestions[idx]['ai_category'] = result.get('category', suggestions[idx]['suggested_category'])
suggestions[idx]['ai_title'] = result.get('short_title', suggestions[idx]['suggested_title'])
suggestions[idx]['ai_date'] = result.get('target_date')
suggestions[idx]['ai_status'] = result.get('status', suggestions[idx]['suggested_status'])
return suggestions
except Exception as e:
logger.warning(f"AI categorization failed: {e}")
return suggestions # Return original suggestions without AI enhancement