nordabiz/verify_architecture_accuracy.py

#!/usr/bin/env python3
"""
Architecture Documentation Accuracy Verification Script

Cross-checks architecture documentation against actual codebase to verify accuracy.
"""

import os
import re
import json
from pathlib import Path
from typing import List, Dict, Any, Tuple

class ArchitectureVerifier:
    def __init__(self):
        self.issues = []
        self.warnings = []
        self.verified = []
        self.base_path = Path('.')

    def add_issue(self, category: str, message: str, severity: str = 'ERROR'):
        self.issues.append({
            'category': category,
            'message': message,
            'severity': severity
        })

    def add_warning(self, category: str, message: str):
        self.warnings.append({
            'category': category,
            'message': message
        })

    def add_verified(self, category: str, message: str):
        self.verified.append({
            'category': category,
            'message': message
        })

    def check_file_exists(self, filepath: str, doc_reference: str) -> bool:
        """Check if a file mentioned in docs actually exists"""
        full_path = self.base_path / filepath
        if full_path.exists():
            self.add_verified('File Existence', f'{filepath} exists (referenced in {doc_reference})')
            return True
        else:
            self.add_issue('File Existence', f'{filepath} NOT FOUND (referenced in {doc_reference})')
            return False

    def verify_core_files(self):
        """Verify core application files exist"""
        print("📁 Verifying core application files...")

        core_files = [
            ('app.py', 'Flask Components'),
            ('database.py', 'Database Schema'),
            ('gemini_service.py', 'External Integrations'),
            ('nordabiz_chat.py', 'AI Chat Flow'),
            ('search_service.py', 'Search Flow'),
            ('email_service.py', 'External Integrations'),
            ('krs_api_service.py', 'External Integrations'),
            ('gbp_audit_service.py', 'External Integrations'),
            ('it_audit_service.py', 'External Integrations'),
        ]

        for filepath, doc_ref in core_files:
            self.check_file_exists(filepath, doc_ref)

    def verify_database_models(self):
        """Verify database models match documentation"""
        print("🗄️  Verifying database models...")

        if not self.check_file_exists('database.py', 'Database Schema'):
            return

        # Read database.py and extract model classes
        with open('database.py', 'r', encoding='utf-8') as f:
            content = f.read()

        # Find all class definitions that inherit from db.Model
        model_pattern = r'class\s+(\w+)\(.*?db\.Model.*?\):'
        models_found = re.findall(model_pattern, content)

        # Documented models from 05-database-schema.md
        documented_models = [
            'User', 'Company', 'CompanyService', 'CompanyCompetency',
            'CompanyContact', 'CompanySocialMedia', 'CompanyNews',
            'CompanyWebsiteAnalysis', 'CompanyDigitalMaturityAssessment',
            'AIChatConversation', 'AIChatMessage', 'AIAPICost',
            'ForumPost', 'ForumComment', 'Event', 'EventAttendance',
            'Message', 'Conversation', 'ConversationParticipant',
            'Classified', 'Recommendation', 'MembershipFee',
            'UserNotification', 'NewsModeration'
        ]

        # Check for documented models
        for model in documented_models:
            if model in models_found:
                self.add_verified('Database Model', f'Model {model} exists in database.py')
            else:
                self.add_warning('Database Model', f'Model {model} documented but not found in database.py')

        # Check for undocumented models
        for model in models_found:
            if model not in documented_models and model not in ['Service', 'Competency']:
                self.add_warning('Database Model', f'Model {model} exists in code but not documented')

        print(f"   Found {len(models_found)} models in database.py")
        print(f"   Documented: {len(documented_models)} models")

    def verify_api_endpoints(self):
        """Verify API endpoints match documentation"""
        print("🌐 Verifying API endpoints...")

        if not self.check_file_exists('app.py', 'API Endpoints'):
            return

        with open('app.py', 'r', encoding='utf-8') as f:
            content = f.read()

        # Find all route decorators
        route_pattern = r'@app\.route\([\'"]([^\'"]+)[\'"](?:,\s*methods=\[([^\]]+)\])?'
        routes_found = re.findall(route_pattern, content)

        print(f"   Found {len(routes_found)} route definitions in app.py")

        # Sample critical endpoints to verify
        critical_endpoints = [
            ('/', 'Homepage'),
            ('/search', 'Company Search'),
            ('/company/<slug>', 'Company Profile'),
            ('/login', 'Authentication'),
            ('/register', 'Authentication'),
            ('/api/chat/<int:conversation_id>/message', 'AI Chat'),
            ('/admin/seo', 'SEO Audit'),
            ('/health', 'Health Check'),
        ]

        for endpoint, description in critical_endpoints:
            # Normalize endpoint pattern for comparison
            endpoint_normalized = endpoint.replace('<slug>', '.*').replace('<int:conversation_id>', '.*')
            found = any(re.match(endpoint_normalized, route[0]) for route in routes_found)

            if found:
                self.add_verified('API Endpoint', f'{endpoint} ({description}) exists')
            else:
                self.add_issue('API Endpoint', f'{endpoint} ({description}) NOT FOUND')

    def verify_external_api_configs(self):
        """Verify external API configurations"""
        print("🔌 Verifying external API integrations...")

        # Check for API service files
        api_services = {
            'gemini_service.py': 'Google Gemini AI',
            'krs_api_service.py': 'KRS Open API',
            'gbp_audit_service.py': 'Google Places API',
            'email_service.py': 'Microsoft Graph API',
        }

        for filepath, api_name in api_services.items():
            if self.check_file_exists(filepath, 'External Integrations'):
                # Check for API key/config references
                with open(filepath, 'r', encoding='utf-8') as f:
                    content = f.read()

                if 'API_KEY' in content or 'api_key' in content or 'GOOGLE' in content or 'GEMINI' in content:
                    self.add_verified('API Integration', f'{api_name} has API configuration')
                else:
                    self.add_warning('API Integration', f'{api_name} service exists but no API key found')

    def verify_infrastructure_docs(self):
        """Verify infrastructure details match documentation"""
        print("🏗️  Verifying infrastructure documentation...")

        # Check deployment architecture mentions
        deployment_doc = self.base_path / 'docs/architecture/03-deployment-architecture.md'

        if deployment_doc.exists():
            with open(deployment_doc, 'r', encoding='utf-8') as f:
                content = f.read()

            # Verify critical configurations are mentioned
            critical_items = [
                ('10.22.68.249', 'NORDABIZ-01 IP address'),
                ('10.22.68.250', 'R11-REVPROXY-01 IP address'),
                ('port 5000', 'Flask/Gunicorn port'),
                ('port 5432', 'PostgreSQL port'),
                ('NPM', 'Nginx Proxy Manager'),
            ]

            for item, description in critical_items:
                if item in content:
                    self.add_verified('Infrastructure', f'{description} documented')
                else:
                    self.add_warning('Infrastructure', f'{description} NOT found in deployment docs')
        else:
            self.add_issue('Infrastructure', 'Deployment architecture document not found')

    def verify_security_features(self):
        """Verify security features documented match implementation"""
        print("🔒 Verifying security features...")

        if not self.check_file_exists('app.py', 'Security Architecture'):
            return

        with open('app.py', 'r', encoding='utf-8') as f:
            content = f.read()

        security_features = [
            ('Flask-Login', 'login_required', 'Authentication'),
            ('Flask-WTF', 'csrf', 'CSRF Protection'),
            ('Flask-Limiter', 'limiter', 'Rate Limiting'),
            ('werkzeug.security', 'generate_password_hash', 'Password Hashing'),
        ]

        for package, marker, feature in security_features:
            if marker in content:
                self.add_verified('Security', f'{feature} ({package}) implemented')
            else:
                self.add_warning('Security', f'{feature} ({package}) not found in app.py')

    def verify_data_flows(self):
        """Verify data flow documentation accuracy"""
        print("🔄 Verifying data flow documentation...")

        flow_docs = [
            'docs/architecture/flows/01-authentication-flow.md',
            'docs/architecture/flows/02-search-flow.md',
            'docs/architecture/flows/03-ai-chat-flow.md',
            'docs/architecture/flows/04-seo-audit-flow.md',
            'docs/architecture/flows/05-news-monitoring-flow.md',
            'docs/architecture/flows/06-http-request-flow.md',
        ]

        for doc in flow_docs:
            if (self.base_path / doc).exists():
                self.add_verified('Data Flow', f'{Path(doc).name} exists')
            else:
                self.add_issue('Data Flow', f'{Path(doc).name} NOT FOUND')

    def verify_scripts_directory(self):
        """Verify scripts mentioned in documentation exist"""
        print("📜 Verifying scripts directory...")

        expected_scripts = [
            'scripts/seo_audit.py',
            'scripts/social_media_audit.py',
        ]

        for script in expected_scripts:
            self.check_file_exists(script, 'SEO Audit Flow / Background Scripts')

    def generate_report(self) -> str:
        """Generate verification report"""
        report = []
        report.append("# Architecture Documentation Accuracy Verification Report\n")
        report.append(f"Generated: {Path.cwd()}\n")
        report.append("\n## Executive Summary\n")
        report.append(f"- ✅ **Verified Items:** {len(self.verified)}\n")
        report.append(f"- ⚠️  **Warnings:** {len(self.warnings)}\n")
        report.append(f"- ❌ **Issues:** {len(self.issues)}\n")

        if len(self.issues) == 0:
            report.append("\n✅ **RESULT: PASS** - Documentation accurately reflects codebase\n")
        else:
            report.append("\n⚠️  **RESULT: ISSUES FOUND** - See details below\n")

        # Verified items
        if self.verified:
            report.append("\n## ✅ Verified Items\n")
            categories = {}
            for item in self.verified:
                cat = item['category']
                if cat not in categories:
                    categories[cat] = []
                categories[cat].append(item['message'])

            for cat, messages in sorted(categories.items()):
                report.append(f"\n### {cat} ({len(messages)} items)\n")
                for msg in messages[:10]:  # Limit to first 10 per category
                    report.append(f"- {msg}\n")
                if len(messages) > 10:
                    report.append(f"- ... and {len(messages) - 10} more\n")

        # Warnings
        if self.warnings:
            report.append("\n## ⚠️  Warnings\n")
            categories = {}
            for item in self.warnings:
                cat = item['category']
                if cat not in categories:
                    categories[cat] = []
                categories[cat].append(item['message'])

            for cat, messages in sorted(categories.items()):
                report.append(f"\n### {cat}\n")
                for msg in messages:
                    report.append(f"- {msg}\n")

        # Issues
        if self.issues:
            report.append("\n## ❌ Issues Found\n")
            categories = {}
            for item in self.issues:
                cat = item['category']
                if cat not in categories:
                    categories[cat] = []
                categories[cat].append(item['message'])

            for cat, messages in sorted(categories.items()):
                report.append(f"\n### {cat}\n")
                for msg in messages:
                    report.append(f"- {msg}\n")

        # Recommendations
        report.append("\n## 📋 Recommendations\n")
        if len(self.issues) == 0 and len(self.warnings) == 0:
            report.append("- Documentation is accurate and up-to-date\n")
            report.append("- No action required\n")
        else:
            if self.warnings:
                report.append("- Review warnings to ensure documentation completeness\n")
            if self.issues:
                report.append("- **CRITICAL:** Address issues found - documentation may be inaccurate\n")
                report.append("- Update documentation or fix code references\n")

        report.append("\n## Next Steps\n")
        report.append("1. Review all warnings and issues above\n")
        report.append("2. Update documentation or code as needed\n")
        report.append("3. Re-run this verification script\n")
        report.append("4. Proceed to subtask 8.3: Create maintenance checklist\n")

        return ''.join(report)

    def run_all_checks(self):
        """Run all verification checks"""
        print("\n" + "="*60)
        print("Architecture Documentation Accuracy Verification")
        print("="*60 + "\n")

        self.verify_core_files()
        self.verify_database_models()
        self.verify_api_endpoints()
        self.verify_external_api_configs()
        self.verify_infrastructure_docs()
        self.verify_security_features()
        self.verify_data_flows()
        self.verify_scripts_directory()

        print("\n" + "="*60)
        print("Verification Complete")
        print("="*60 + "\n")

        print(f"✅ Verified: {len(self.verified)}")
        print(f"⚠️  Warnings: {len(self.warnings)}")
        print(f"❌ Issues: {len(self.issues)}")

        return self.generate_report()

def main():
    verifier = ArchitectureVerifier()
    report = verifier.run_all_checks()

    # Write report to file
    report_file = 'ARCHITECTURE_VERIFICATION_REPORT.md'
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write(report)

    print(f"\n📄 Report saved to: {report_file}")

    # Also print summary
    print("\n" + report)

if __name__ == '__main__':
    main()