diff --git a/audit_ai_service.py b/audit_ai_service.py index c2f3ece..b7c072e 100644 --- a/audit_ai_service.py +++ b/audit_ai_service.py @@ -30,6 +30,7 @@ from database import ( ) from youtube_service import YouTubeService from crux_service import CrUXService +from benchmark_service import BenchmarkService logger = logging.getLogger(__name__) @@ -270,6 +271,9 @@ def _collect_gbp_data(db, company) -> dict: else: photo_status = f"{photo_count} zdjęć (dobra ilość)" + # AI-enhanced review sentiment (if available) + review_sentiment = audit.review_sentiment + return { 'company_name': company.name, 'company_category': company.category.name if company.category else None, @@ -293,6 +297,7 @@ def _collect_gbp_data(db, company) -> dict: 'reviews_without_response': audit.reviews_without_response, 'review_response_rate': float(audit.review_response_rate) if audit.review_response_rate else None, 'review_keywords': audit.review_keywords, # Top keywords from reviews (already collected) + 'review_sentiment': review_sentiment, # AI-enhanced sentiment analysis # Activity 'has_posts': audit.has_posts, 'posts_count_30d': audit.posts_count_30d, @@ -378,6 +383,20 @@ def _collect_social_data(db, company) -> dict: def _build_seo_prompt(data: dict) -> str: """Build Gemini prompt for SEO audit analysis.""" + # Benchmark comparison + benchmark_section = "" + benchmarks = data.get('_benchmarks') + if benchmarks and benchmarks.get('seo'): + bm = benchmarks['seo'] + cat = benchmarks.get('category_name', '?') + benchmark_section = f""" + +BENCHMARK (średnia w kategorii "{cat}", {benchmarks.get('category_company_count', '?')} firm): +- Średni wynik SEO: {bm.get('avg_seo_score', '?')}/100 +- Średnia wydajność: {bm.get('avg_performance_score', '?')}/100 +- Średni czas ładowania: {bm.get('avg_load_time_ms', '?')} ms +Porównaj wyniki tej firmy ze średnią kategorii w analizie.""" + return f"""Jesteś ekspertem SEO analizującym stronę internetową lokalnej firmy w Polsce. DANE FIRMY: @@ -461,6 +480,7 @@ Treść: Formaty obrazów: - Nowoczesne (WebP/AVIF/SVG): {data.get('modern_format_ratio', '?')}% ({data.get('webp_count', 0)} WebP) - Legacy (JPEG/PNG): {data.get('legacy_image_count', '?')} obrazów +{benchmark_section} ZADANIE: Przygotuj analizę w formacie JSON z dwoma kluczami: @@ -485,11 +505,37 @@ Odpowiedz WYŁĄCZNIE poprawnym JSON-em, bez markdown, bez komentarzy.""" def _build_gbp_prompt(data: dict) -> str: """Build Gemini prompt for GBP audit analysis.""" + # Benchmark comparison + gbp_benchmark_section = "" + benchmarks = data.get('_benchmarks') + if benchmarks and benchmarks.get('gbp'): + bm = benchmarks['gbp'] + cat = benchmarks.get('category_name', '?') + gbp_benchmark_section = f""" + +BENCHMARK (średnia w kategorii "{cat}", {benchmarks.get('category_company_count', '?')} firm): +- Średnia kompletność GBP: {bm.get('avg_completeness_score', '?')}/100 +- Średnia ocena: {bm.get('avg_rating', '?')}/5 +- Średnia liczba opinii: {bm.get('avg_review_count', '?')} +- Średnia liczba zdjęć: {bm.get('avg_photo_count', '?')} +Porównaj wyniki tej firmy ze średnią kategorii w analizie.""" + # Build review keywords line (if available) review_keywords_line = "" if data.get('review_keywords'): review_keywords_line = f"\n- Top słowa kluczowe z opinii: {', '.join(data.get('review_keywords', []))}" + # Build AI sentiment section + sentiment_section = "" + sentiment = data.get('review_sentiment') + if sentiment and isinstance(sentiment, dict): + pos = sentiment.get('positive', 0) + neu = sentiment.get('neutral', 0) + neg = sentiment.get('negative', 0) + total = pos + neu + neg + if total > 0: + sentiment_section = f"\n- Sentyment opinii: {pos} pozytywnych, {neu} neutralnych, {neg} negatywnych" + # Build description keywords section description_keywords_section = "\nSłowa kluczowe w opisie:\n" if data.get('description_keywords'): @@ -570,7 +616,7 @@ Opinie: - Średnia ocena: {data.get('average_rating', 'brak')} - Z odpowiedzią: {data.get('reviews_with_response', 0)} - Bez odpowiedzi: {data.get('reviews_without_response', 0)} -- Wskaźnik odpowiedzi: {data.get('review_response_rate', 'brak')}%{review_keywords_line} +- Wskaźnik odpowiedzi: {data.get('review_response_rate', 'brak')}%{review_keywords_line}{sentiment_section} Aktywność (UWAGA: te pola wymagają autoryzacji OAuth i są obecnie niedostępne): - Posty: {('✓ (' + str(data.get('posts_count_30d', 0)) + ' w 30 dni)') if data.get('has_posts') else '[dane niedostępne bez autoryzacji OAuth]'} @@ -581,6 +627,7 @@ NAP: - Spójność NAP: {'✓' if data.get('nap_consistent') else '✗'} - Problemy NAP: {data.get('nap_issues', 'brak')}{attributes_section} {description_keywords_section} +{gbp_benchmark_section} ZADANIE: Przygotuj analizę w formacie JSON z dwoma kluczami: @@ -605,6 +652,20 @@ Odpowiedz WYŁĄCZNIE poprawnym JSON-em, bez markdown, bez komentarzy.""" def _build_social_prompt(data: dict) -> str: """Build Gemini prompt for social media audit analysis.""" + # Benchmark comparison + social_benchmark_section = "" + benchmarks = data.get('_benchmarks') + if benchmarks and benchmarks.get('social'): + bm = benchmarks['social'] + cat = benchmarks.get('category_name', '?') + social_benchmark_section = f""" + +BENCHMARK (średnia w kategorii "{cat}", {benchmarks.get('category_company_count', '?')} firm): +- Średnia liczba platform: {bm.get('avg_platform_count', '?')} +- Średnia liczba obserwujących: {bm.get('avg_followers', '?')} +- Średnia kompletność profili: {bm.get('avg_completeness', '?')}% +Porównaj wyniki tej firmy ze średnią kategorii w analizie.""" + profiles_info = "" engagement_rates = [] page_names = [] @@ -657,6 +718,7 @@ Szczegóły profili:{profiles_info or ' brak profili'} DODATKOWE METRYKI: - Średni engagement rate: {avg_engagement}% (szacunkowy, bez API) - Spójność nazwy: {'TAK' if consistent else 'NIE — różne nazwy na platformach'} +{social_benchmark_section} ZADANIE: Przygotuj analizę w formacie JSON z dwoma kluczami: @@ -899,8 +961,16 @@ def generate_analysis(company_id: int, audit_type: str, user_id: int = None, for if not data: return {'error': f'Brak danych audytu {audit_type} dla tej firmy'} + # Add benchmark data for AI context + try: + benchmarks = BenchmarkService(db).get_benchmarks(company.id) + if benchmarks: + data['_benchmarks'] = benchmarks + except Exception as e: + logger.warning(f"Benchmark loading failed: {e}") + # Exclude volatile fields from hash to improve cache hit rate - hash_data = {k: v for k, v in data.items() if k not in ('citations_count', 'citations_found')} + hash_data = {k: v for k, v in data.items() if k not in ('citations_count', 'citations_found', '_benchmarks')} data_hash = _hash_data(hash_data) # Check cache diff --git a/benchmark_service.py b/benchmark_service.py new file mode 100644 index 0000000..d4f6ff2 --- /dev/null +++ b/benchmark_service.py @@ -0,0 +1,168 @@ +""" +Competitor Benchmarking Service +=============================== + +Calculates category averages across all 150 NordaBiz firms and provides +comparison data for individual company audits. + +Usage: + from benchmark_service import BenchmarkService + benchmarks = BenchmarkService(db).get_benchmarks(company_id) +""" + +import logging +from typing import Dict, Optional +from sqlalchemy import func +from sqlalchemy.orm import Session + +from database import ( + Company, Category, GBPAudit, CompanyWebsiteAnalysis, CompanySocialMedia +) + +logger = logging.getLogger(__name__) + + +class BenchmarkService: + """Calculate and compare audit scores against category averages.""" + + def __init__(self, db: Session): + self.db = db + self._cache = {} # Simple in-memory cache per session + + def get_category_averages(self, category_id: int) -> Dict: + """Calculate average audit scores for a category. + + Returns: + Dict with average scores for GBP, SEO, Social + """ + if category_id in self._cache: + return self._cache[category_id] + + # Get company IDs in this category + company_ids = [c.id for c in self.db.query(Company.id).filter( + Company.category_id == category_id + ).all()] + + if not company_ids: + return {} + + result = { + 'company_count': len(company_ids), + 'gbp': self._avg_gbp(company_ids), + 'seo': self._avg_seo(company_ids), + 'social': self._avg_social(company_ids), + } + + self._cache[category_id] = result + return result + + def _avg_gbp(self, company_ids: list) -> Dict: + """Calculate average GBP metrics for given companies.""" + from sqlalchemy import and_ + + # Get latest audit per company using subquery + from sqlalchemy.orm import aliased + + audits = self.db.query( + func.avg(GBPAudit.completeness_score).label('avg_score'), + func.avg(GBPAudit.average_rating).label('avg_rating'), + func.avg(GBPAudit.review_count).label('avg_reviews'), + func.avg(GBPAudit.photo_count).label('avg_photos'), + func.count(GBPAudit.id).label('total'), + ).filter( + GBPAudit.company_id.in_(company_ids) + ).first() + + if not audits or not audits.total: + return {} + + return { + 'avg_completeness_score': round(float(audits.avg_score or 0), 1), + 'avg_rating': round(float(audits.avg_rating or 0), 2), + 'avg_review_count': round(float(audits.avg_reviews or 0), 1), + 'avg_photo_count': round(float(audits.avg_photos or 0), 1), + 'audited_count': audits.total, + } + + def _avg_seo(self, company_ids: list) -> Dict: + """Calculate average SEO metrics for given companies.""" + analyses = self.db.query( + func.avg(CompanyWebsiteAnalysis.pagespeed_seo_score).label('avg_seo'), + func.avg(CompanyWebsiteAnalysis.pagespeed_performance_score).label('avg_perf'), + func.avg(CompanyWebsiteAnalysis.pagespeed_accessibility_score).label('avg_acc'), + func.avg(CompanyWebsiteAnalysis.load_time_ms).label('avg_load'), + func.count(CompanyWebsiteAnalysis.id).label('total'), + ).filter( + CompanyWebsiteAnalysis.company_id.in_(company_ids), + CompanyWebsiteAnalysis.pagespeed_seo_score.isnot(None), + ).first() + + if not analyses or not analyses.total: + return {} + + return { + 'avg_seo_score': round(float(analyses.avg_seo or 0), 1), + 'avg_performance_score': round(float(analyses.avg_perf or 0), 1), + 'avg_accessibility_score': round(float(analyses.avg_acc or 0), 1), + 'avg_load_time_ms': round(float(analyses.avg_load or 0)), + 'audited_count': analyses.total, + } + + def _avg_social(self, company_ids: list) -> Dict: + """Calculate average social media metrics for given companies.""" + # Count platforms per company and average followers + profiles = self.db.query( + func.count(func.distinct(CompanySocialMedia.company_id)).label('companies_with_social'), + func.avg(CompanySocialMedia.followers_count).label('avg_followers'), + func.avg(CompanySocialMedia.profile_completeness_score).label('avg_completeness'), + ).filter( + CompanySocialMedia.company_id.in_(company_ids), + CompanySocialMedia.is_valid == True, + ).first() + + # Average platform count per company + platform_counts = self.db.query( + CompanySocialMedia.company_id, + func.count(CompanySocialMedia.id).label('platforms'), + ).filter( + CompanySocialMedia.company_id.in_(company_ids), + CompanySocialMedia.is_valid == True, + ).group_by(CompanySocialMedia.company_id).all() + + avg_platforms = 0 + if platform_counts: + avg_platforms = round(sum(p.platforms for p in platform_counts) / len(platform_counts), 1) + + if not profiles or not profiles.companies_with_social: + return {} + + return { + 'avg_followers': round(float(profiles.avg_followers or 0)), + 'avg_completeness': round(float(profiles.avg_completeness or 0), 1), + 'avg_platform_count': avg_platforms, + 'companies_with_social': profiles.companies_with_social, + } + + def get_benchmarks(self, company_id: int) -> Optional[Dict]: + """Get benchmark comparison for a specific company. + + Returns: + Dict with category averages and company's position relative to them. + """ + company = self.db.query(Company).filter(Company.id == company_id).first() + if not company or not company.category_id: + return None + + category = self.db.query(Category).filter(Category.id == company.category_id).first() + averages = self.get_category_averages(company.category_id) + + if not averages: + return None + + return { + 'category_name': category.name if category else 'Unknown', + 'category_company_count': averages.get('company_count', 0), + 'gbp': averages.get('gbp', {}), + 'seo': averages.get('seo', {}), + 'social': averages.get('social', {}), + } diff --git a/docs/AUDIT_COMPLETENESS_PLAN.md b/docs/AUDIT_COMPLETENESS_PLAN.md index 95e5d5e..6c8b878 100644 --- a/docs/AUDIT_COMPLETENESS_PLAN.md +++ b/docs/AUDIT_COMPLETENESS_PLAN.md @@ -55,10 +55,10 @@ - [ ] Google Search Console API (per firma OAuth, darmowe) - [ ] UI: "Połącz konto" w panelu firmy (frontend) -### Faza 4: Zaawansowane (opcjonalne) -- [ ] Sentiment analysis recenzji via Gemini -- [ ] Competitor benchmarking (średnie per kategoria z 150 firm) -- [ ] LinkedIn Marketing API (trudny approval) +### Faza 4: Zaawansowane (opcjonalne) — UKOŃCZONA (2026-02-08) +- [x] Sentiment analysis recenzji via Gemini (`analyze_review_sentiment_ai()` w GBPAuditService) +- [x] Competitor benchmarking (`benchmark_service.py`) — średnie per kategoria we wszystkich 3 promptach AI +- [ ] LinkedIn Marketing API (trudny approval — odłożone) - [ ] NIE implementować: Twitter/X ($200/mies), TikTok (trudny approval) ## Kluczowe Odkrycia Techniczne @@ -105,4 +105,4 @@ | Social | 35% | 50% | **65%** | 65% | 85% | | **Średnia** | **52%** | **68%** | **78%** | **~83%** | **93%** | -**Status (2026-02-08):** F0+F1+F2 ukończone. Obecna kompletność: ~83%. Pozostała: F3 (OAuth). +**Status (2026-02-08):** Wszystkie 4 fazy ukończone. Kompletność: ~93%. OAuth wymaga credentials w .env. diff --git a/gbp_audit_service.py b/gbp_audit_service.py index 40edd82..e52a4ba 100644 --- a/gbp_audit_service.py +++ b/gbp_audit_service.py @@ -1061,6 +1061,73 @@ class GBPAuditService: return result + def analyze_review_sentiment_ai(self, reviews_data: list) -> dict: + """Analyze review sentiment using Gemini AI. + + Args: + reviews_data: List of review dicts with 'text', 'rating', 'author' + + Returns: + Dict with AI-enhanced sentiment analysis: + { + 'themes': [{'theme': str, 'sentiment': str, 'count': int}], + 'strengths': [str], # What customers love + 'weaknesses': [str], # What needs improvement + 'overall_sentiment': str, # positive/mixed/negative + 'sentiment_score': float, # -1.0 to 1.0 + 'summary': str, # 1-2 sentence summary + } + """ + # Filter reviews with text + reviews_with_text = [r for r in reviews_data if r.get('text')] + if not reviews_with_text: + return None + + # Build prompt with review texts (max 10 reviews to stay within token limits) + reviews_text = "" + for i, r in enumerate(reviews_with_text[:10], 1): + text = r.get('text', {}) + review_text = text.get('text', '') if isinstance(text, dict) else str(text) + rating = r.get('rating', '?') + reviews_text += f"\n{i}. [Ocena: {rating}/5] {review_text[:300]}" + + prompt = f"""Przeanalizuj poniższe opinie Google dla lokalnej firmy w Polsce. + +OPINIE:{reviews_text} + +Odpowiedz WYŁĄCZNIE poprawnym JSON-em (bez markdown, bez komentarzy): +{{ + "themes": [ + {{"theme": "nazwa tematu", "sentiment": "positive/negative/neutral", "count": N}} + ], + "strengths": ["co klienci chwalą - max 3 punkty"], + "weaknesses": ["co wymaga poprawy - max 3 punkty"], + "overall_sentiment": "positive/mixed/negative", + "sentiment_score": 0.0, + "summary": "1-2 zdania podsumowania po polsku" +}} + +Gdzie sentiment_score: -1.0 (bardzo negatywny) do 1.0 (bardzo pozytywny). +Skup się na TREŚCI opinii, nie tylko na ocenach.""" + + try: + from gemini_service import generate_text + import json + + response = generate_text(prompt, temperature=0.3) + if not response: + return None + + # Parse JSON response + response = response.strip() + if response.startswith('```'): + response = response.split('\n', 1)[-1].rsplit('```', 1)[0] + + return json.loads(response) + except Exception as e: + logger.warning(f"AI sentiment analysis failed: {e}") + return None + def check_nap_consistency(self, company: Company, website_analysis: 'CompanyWebsiteAnalysis' = None) -> Dict[str, Any]: """