#!/usr/bin/env python3 """ Internal Health Logger ====================== Cron job (*/5 * * * *) - zapisuje stan serwera co 5 minut. Pozwala odróżnić awarię ISP od awarii serwera. Użycie: */5 * * * * cd /var/www/nordabiznes && DATABASE_URL=$(grep DATABASE_URL .env | cut -d'=' -f2) /var/www/nordabiznes/venv/bin/python3 scripts/internal_health_logger.py """ import os import sys import subprocess import urllib.request import urllib.error from datetime import datetime, timedelta # Setup path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from database import SessionLocal, InternalHealthLog HEALTH_URL = 'http://localhost:5000/health' RETENTION_DAYS = 90 def check_app_health(): """Sprawdź czy aplikacja Flask odpowiada na /health""" try: req = urllib.request.Request(HEALTH_URL, method='GET') with urllib.request.urlopen(req, timeout=5) as resp: return resp.status == 200 except Exception: return False def check_db_health(): """Sprawdź czy PostgreSQL jest dostępny""" try: db = SessionLocal() from sqlalchemy import text db.execute(text('SELECT 1')) db.close() return True except Exception: return False def get_cpu_percent(): """Pobierz użycie CPU z /proc/stat lub top""" try: result = subprocess.run( ['top', '-bn1'], capture_output=True, text=True, timeout=10 ) for line in result.stdout.split('\n'): if 'Cpu' in line or '%Cpu' in line: # Format: %Cpu(s): 2.3 us, 0.5 sy, ... 96.2 id parts = line.split() for i, part in enumerate(parts): if part == 'id,' or part == 'id': idle = float(parts[i - 1]) return round(100.0 - idle, 2) return None except Exception: return None def get_ram_percent(): """Pobierz użycie RAM""" try: result = subprocess.run( ['free', '-m'], capture_output=True, text=True, timeout=5 ) for line in result.stdout.split('\n'): if line.startswith('Mem:'): parts = line.split() total = float(parts[1]) available = float(parts[6]) # available column used_pct = round((1 - available / total) * 100, 2) return used_pct return None except Exception: return None def get_disk_percent(): """Pobierz użycie dysku /""" try: result = subprocess.run( ['df', '-h', '/'], capture_output=True, text=True, timeout=5 ) lines = result.stdout.strip().split('\n') if len(lines) >= 2: parts = lines[1].split() # Format: Filesystem Size Used Avail Use% Mounted for part in parts: if part.endswith('%'): return float(part.rstrip('%')) return None except Exception: return None def get_gunicorn_workers(): """Policz aktywne procesy gunicorn""" try: result = subprocess.run( ['pgrep', '-c', 'gunicorn'], capture_output=True, text=True, timeout=5 ) return int(result.stdout.strip()) if result.returncode == 0 else 0 except Exception: return 0 def cleanup_old_logs(db): """Usuń logi starsze niż RETENTION_DAYS""" cutoff = datetime.now() - timedelta(days=RETENTION_DAYS) deleted = db.query(InternalHealthLog).filter( InternalHealthLog.checked_at < cutoff ).delete() if deleted: db.commit() print(f"Usunięto {deleted} starych logów health (>{RETENTION_DAYS} dni)") def main(): db = SessionLocal() try: log = InternalHealthLog( checked_at=datetime.now(), app_ok=check_app_health(), db_ok=check_db_health(), cpu_percent=get_cpu_percent(), ram_percent=get_ram_percent(), disk_percent=get_disk_percent(), gunicorn_workers=get_gunicorn_workers() ) db.add(log) db.commit() # Cleanup co jakiś czas (sprawdź raz dziennie, przy pełnej godzinie 3:00) now = datetime.now() if now.hour == 3 and now.minute < 5: cleanup_old_logs(db) print(f"[{log.checked_at}] app={log.app_ok} db={log.db_ok} " f"cpu={log.cpu_percent}% ram={log.ram_percent}% disk={log.disk_percent}% " f"workers={log.gunicorn_workers}") except Exception as e: print(f"ERROR: {e}", file=sys.stderr) db.rollback() finally: db.close() if __name__ == '__main__': main()