nordabiz/verify_architecture_accuracy.py
Maciej Pienczyn 8ee5945ccd fix: Handle NULL views_count in forum and classifieds
- Forum topics and classifieds now handle NULL views_count gracefully
- Prevents TypeError when incrementing view counter
2026-01-11 06:03:13 +01:00

377 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Architecture Documentation Accuracy Verification Script
Cross-checks architecture documentation against actual codebase to verify accuracy.
"""
import os
import re
import json
from pathlib import Path
from typing import List, Dict, Any, Tuple
class ArchitectureVerifier:
def __init__(self):
self.issues = []
self.warnings = []
self.verified = []
self.base_path = Path('.')
def add_issue(self, category: str, message: str, severity: str = 'ERROR'):
self.issues.append({
'category': category,
'message': message,
'severity': severity
})
def add_warning(self, category: str, message: str):
self.warnings.append({
'category': category,
'message': message
})
def add_verified(self, category: str, message: str):
self.verified.append({
'category': category,
'message': message
})
def check_file_exists(self, filepath: str, doc_reference: str) -> bool:
"""Check if a file mentioned in docs actually exists"""
full_path = self.base_path / filepath
if full_path.exists():
self.add_verified('File Existence', f'{filepath} exists (referenced in {doc_reference})')
return True
else:
self.add_issue('File Existence', f'{filepath} NOT FOUND (referenced in {doc_reference})')
return False
def verify_core_files(self):
"""Verify core application files exist"""
print("📁 Verifying core application files...")
core_files = [
('app.py', 'Flask Components'),
('database.py', 'Database Schema'),
('gemini_service.py', 'External Integrations'),
('nordabiz_chat.py', 'AI Chat Flow'),
('search_service.py', 'Search Flow'),
('email_service.py', 'External Integrations'),
('krs_api_service.py', 'External Integrations'),
('gbp_audit_service.py', 'External Integrations'),
('it_audit_service.py', 'External Integrations'),
]
for filepath, doc_ref in core_files:
self.check_file_exists(filepath, doc_ref)
def verify_database_models(self):
"""Verify database models match documentation"""
print("🗄️ Verifying database models...")
if not self.check_file_exists('database.py', 'Database Schema'):
return
# Read database.py and extract model classes
with open('database.py', 'r', encoding='utf-8') as f:
content = f.read()
# Find all class definitions that inherit from db.Model
model_pattern = r'class\s+(\w+)\(.*?db\.Model.*?\):'
models_found = re.findall(model_pattern, content)
# Documented models from 05-database-schema.md
documented_models = [
'User', 'Company', 'CompanyService', 'CompanyCompetency',
'CompanyContact', 'CompanySocialMedia', 'CompanyNews',
'CompanyWebsiteAnalysis', 'CompanyDigitalMaturityAssessment',
'AIChatConversation', 'AIChatMessage', 'AIAPICost',
'ForumPost', 'ForumComment', 'Event', 'EventAttendance',
'Message', 'Conversation', 'ConversationParticipant',
'Classified', 'Recommendation', 'MembershipFee',
'UserNotification', 'NewsModeration'
]
# Check for documented models
for model in documented_models:
if model in models_found:
self.add_verified('Database Model', f'Model {model} exists in database.py')
else:
self.add_warning('Database Model', f'Model {model} documented but not found in database.py')
# Check for undocumented models
for model in models_found:
if model not in documented_models and model not in ['Service', 'Competency']:
self.add_warning('Database Model', f'Model {model} exists in code but not documented')
print(f" Found {len(models_found)} models in database.py")
print(f" Documented: {len(documented_models)} models")
def verify_api_endpoints(self):
"""Verify API endpoints match documentation"""
print("🌐 Verifying API endpoints...")
if not self.check_file_exists('app.py', 'API Endpoints'):
return
with open('app.py', 'r', encoding='utf-8') as f:
content = f.read()
# Find all route decorators
route_pattern = r'@app\.route\([\'"]([^\'"]+)[\'"](?:,\s*methods=\[([^\]]+)\])?'
routes_found = re.findall(route_pattern, content)
print(f" Found {len(routes_found)} route definitions in app.py")
# Sample critical endpoints to verify
critical_endpoints = [
('/', 'Homepage'),
('/search', 'Company Search'),
('/company/<slug>', 'Company Profile'),
('/login', 'Authentication'),
('/register', 'Authentication'),
('/api/chat/<int:conversation_id>/message', 'AI Chat'),
('/admin/seo', 'SEO Audit'),
('/health', 'Health Check'),
]
for endpoint, description in critical_endpoints:
# Normalize endpoint pattern for comparison
endpoint_normalized = endpoint.replace('<slug>', '.*').replace('<int:conversation_id>', '.*')
found = any(re.match(endpoint_normalized, route[0]) for route in routes_found)
if found:
self.add_verified('API Endpoint', f'{endpoint} ({description}) exists')
else:
self.add_issue('API Endpoint', f'{endpoint} ({description}) NOT FOUND')
def verify_external_api_configs(self):
"""Verify external API configurations"""
print("🔌 Verifying external API integrations...")
# Check for API service files
api_services = {
'gemini_service.py': 'Google Gemini AI',
'krs_api_service.py': 'KRS Open API',
'gbp_audit_service.py': 'Google Places API',
'email_service.py': 'Microsoft Graph API',
}
for filepath, api_name in api_services.items():
if self.check_file_exists(filepath, 'External Integrations'):
# Check for API key/config references
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
if 'API_KEY' in content or 'api_key' in content or 'GOOGLE' in content or 'GEMINI' in content:
self.add_verified('API Integration', f'{api_name} has API configuration')
else:
self.add_warning('API Integration', f'{api_name} service exists but no API key found')
def verify_infrastructure_docs(self):
"""Verify infrastructure details match documentation"""
print("🏗️ Verifying infrastructure documentation...")
# Check deployment architecture mentions
deployment_doc = self.base_path / 'docs/architecture/03-deployment-architecture.md'
if deployment_doc.exists():
with open(deployment_doc, 'r', encoding='utf-8') as f:
content = f.read()
# Verify critical configurations are mentioned
critical_items = [
('10.22.68.249', 'NORDABIZ-01 IP address'),
('10.22.68.250', 'R11-REVPROXY-01 IP address'),
('port 5000', 'Flask/Gunicorn port'),
('port 5432', 'PostgreSQL port'),
('NPM', 'Nginx Proxy Manager'),
]
for item, description in critical_items:
if item in content:
self.add_verified('Infrastructure', f'{description} documented')
else:
self.add_warning('Infrastructure', f'{description} NOT found in deployment docs')
else:
self.add_issue('Infrastructure', 'Deployment architecture document not found')
def verify_security_features(self):
"""Verify security features documented match implementation"""
print("🔒 Verifying security features...")
if not self.check_file_exists('app.py', 'Security Architecture'):
return
with open('app.py', 'r', encoding='utf-8') as f:
content = f.read()
security_features = [
('Flask-Login', 'login_required', 'Authentication'),
('Flask-WTF', 'csrf', 'CSRF Protection'),
('Flask-Limiter', 'limiter', 'Rate Limiting'),
('werkzeug.security', 'generate_password_hash', 'Password Hashing'),
]
for package, marker, feature in security_features:
if marker in content:
self.add_verified('Security', f'{feature} ({package}) implemented')
else:
self.add_warning('Security', f'{feature} ({package}) not found in app.py')
def verify_data_flows(self):
"""Verify data flow documentation accuracy"""
print("🔄 Verifying data flow documentation...")
flow_docs = [
'docs/architecture/flows/01-authentication-flow.md',
'docs/architecture/flows/02-search-flow.md',
'docs/architecture/flows/03-ai-chat-flow.md',
'docs/architecture/flows/04-seo-audit-flow.md',
'docs/architecture/flows/05-news-monitoring-flow.md',
'docs/architecture/flows/06-http-request-flow.md',
]
for doc in flow_docs:
if (self.base_path / doc).exists():
self.add_verified('Data Flow', f'{Path(doc).name} exists')
else:
self.add_issue('Data Flow', f'{Path(doc).name} NOT FOUND')
def verify_scripts_directory(self):
"""Verify scripts mentioned in documentation exist"""
print("📜 Verifying scripts directory...")
expected_scripts = [
'scripts/seo_audit.py',
'scripts/social_media_audit.py',
]
for script in expected_scripts:
self.check_file_exists(script, 'SEO Audit Flow / Background Scripts')
def generate_report(self) -> str:
"""Generate verification report"""
report = []
report.append("# Architecture Documentation Accuracy Verification Report\n")
report.append(f"Generated: {Path.cwd()}\n")
report.append("\n## Executive Summary\n")
report.append(f"- ✅ **Verified Items:** {len(self.verified)}\n")
report.append(f"- ⚠️ **Warnings:** {len(self.warnings)}\n")
report.append(f"- ❌ **Issues:** {len(self.issues)}\n")
if len(self.issues) == 0:
report.append("\n✅ **RESULT: PASS** - Documentation accurately reflects codebase\n")
else:
report.append("\n⚠️ **RESULT: ISSUES FOUND** - See details below\n")
# Verified items
if self.verified:
report.append("\n## ✅ Verified Items\n")
categories = {}
for item in self.verified:
cat = item['category']
if cat not in categories:
categories[cat] = []
categories[cat].append(item['message'])
for cat, messages in sorted(categories.items()):
report.append(f"\n### {cat} ({len(messages)} items)\n")
for msg in messages[:10]: # Limit to first 10 per category
report.append(f"- {msg}\n")
if len(messages) > 10:
report.append(f"- ... and {len(messages) - 10} more\n")
# Warnings
if self.warnings:
report.append("\n## ⚠️ Warnings\n")
categories = {}
for item in self.warnings:
cat = item['category']
if cat not in categories:
categories[cat] = []
categories[cat].append(item['message'])
for cat, messages in sorted(categories.items()):
report.append(f"\n### {cat}\n")
for msg in messages:
report.append(f"- {msg}\n")
# Issues
if self.issues:
report.append("\n## ❌ Issues Found\n")
categories = {}
for item in self.issues:
cat = item['category']
if cat not in categories:
categories[cat] = []
categories[cat].append(item['message'])
for cat, messages in sorted(categories.items()):
report.append(f"\n### {cat}\n")
for msg in messages:
report.append(f"- {msg}\n")
# Recommendations
report.append("\n## 📋 Recommendations\n")
if len(self.issues) == 0 and len(self.warnings) == 0:
report.append("- Documentation is accurate and up-to-date\n")
report.append("- No action required\n")
else:
if self.warnings:
report.append("- Review warnings to ensure documentation completeness\n")
if self.issues:
report.append("- **CRITICAL:** Address issues found - documentation may be inaccurate\n")
report.append("- Update documentation or fix code references\n")
report.append("\n## Next Steps\n")
report.append("1. Review all warnings and issues above\n")
report.append("2. Update documentation or code as needed\n")
report.append("3. Re-run this verification script\n")
report.append("4. Proceed to subtask 8.3: Create maintenance checklist\n")
return ''.join(report)
def run_all_checks(self):
"""Run all verification checks"""
print("\n" + "="*60)
print("Architecture Documentation Accuracy Verification")
print("="*60 + "\n")
self.verify_core_files()
self.verify_database_models()
self.verify_api_endpoints()
self.verify_external_api_configs()
self.verify_infrastructure_docs()
self.verify_security_features()
self.verify_data_flows()
self.verify_scripts_directory()
print("\n" + "="*60)
print("Verification Complete")
print("="*60 + "\n")
print(f"✅ Verified: {len(self.verified)}")
print(f"⚠️ Warnings: {len(self.warnings)}")
print(f"❌ Issues: {len(self.issues)}")
return self.generate_report()
def main():
verifier = ArchitectureVerifier()
report = verifier.run_all_checks()
# Write report to file
report_file = 'ARCHITECTURE_VERIFICATION_REPORT.md'
with open(report_file, 'w', encoding='utf-8') as f:
f.write(report)
print(f"\n📄 Report saved to: {report_file}")
# Also print summary
print("\n" + report)
if __name__ == '__main__':
main()