diff --git a/audit_ai_service.py b/audit_ai_service.py index 2bc7aa7..263dd15 100644 --- a/audit_ai_service.py +++ b/audit_ai_service.py @@ -140,8 +140,17 @@ def _collect_seo_data(db, company) -> dict: 'nap_on_website': analysis.nap_on_website, # Core Web Vitals 'lcp_ms': analysis.largest_contentful_paint_ms, - 'fid_ms': analysis.first_input_delay_ms, + 'inp_ms': getattr(analysis, 'interaction_to_next_paint_ms', None), # Replaced FID in March 2024 'cls': float(analysis.cumulative_layout_shift) if analysis.cumulative_layout_shift else None, + # Additional performance metrics (10 missing metrics) + 'fcp_ms': getattr(analysis, 'first_contentful_paint_ms', None), + 'ttfb_ms': getattr(analysis, 'time_to_first_byte_ms', None), + 'tbt_ms': getattr(analysis, 'total_blocking_time_ms', None), + 'speed_index': getattr(analysis, 'speed_index_ms', None), + 'meta_title_length': len(analysis.meta_title or ''), + 'meta_description_length': len(analysis.meta_description or ''), + 'html_lang': analysis.html_lang, + 'local_business_schema_fields': analysis.local_business_schema_fields, # Content 'content_freshness_score': analysis.content_freshness_score, 'word_count_homepage': analysis.word_count_homepage, @@ -201,6 +210,7 @@ def _collect_gbp_data(db, company) -> dict: 'reviews_with_response': audit.reviews_with_response, 'reviews_without_response': audit.reviews_without_response, 'review_response_rate': float(audit.review_response_rate) if audit.review_response_rate else None, + 'review_keywords': audit.review_keywords, # Top keywords from reviews (already collected) # Activity 'has_posts': audit.has_posts, 'posts_count_30d': audit.posts_count_30d, @@ -214,6 +224,8 @@ def _collect_gbp_data(db, company) -> dict: # NAP 'nap_consistent': audit.nap_consistent, 'nap_issues': audit.nap_issues, + # Keywords + 'description_keywords': audit.description_keywords, # Already collected during audit } @@ -238,6 +250,7 @@ def _collect_social_data(db, company) -> dict: 'posting_frequency_score': p.posting_frequency_score, 'engagement_rate': float(p.engagement_rate) if p.engagement_rate else None, 'profile_completeness_score': p.profile_completeness_score, + 'page_name': getattr(p, 'page_name', None), } present = [p for p in all_platforms if p in profiles_dict] @@ -278,12 +291,19 @@ WYNIKI AUDYTU SEO: Core Web Vitals: - LCP: {data.get('lcp_ms', 'brak')} ms -- FID: {data.get('fid_ms', 'brak')} ms +- INP: {data.get('inp_ms', 'brak')} ms (zastąpił FID w marcu 2024) - CLS: {data.get('cls', 'brak')} +Dodatkowe metryki wydajności: +- FCP: {data.get('fcp_ms', 'brak')} ms +- TTFB: {data.get('ttfb_ms', 'brak')} ms +- TBT: {data.get('tbt_ms', 'brak')} ms +- Speed Index: {data.get('speed_index', 'brak')} ms +- Czas ładowania: {data.get('load_time_ms', 'brak')} ms + On-Page SEO: -- Meta title: {data.get('meta_title', 'brak')} -- Meta description: {'tak' if data.get('meta_description') else 'BRAK'} +- Meta title: {data.get('meta_title', 'brak')} (długość: {data.get('meta_title_length', '?')} znaków, optymalna: 50-60) +- Meta description: {'tak' if data.get('meta_description') else 'BRAK'} (długość: {data.get('meta_description_length', '?')} znaków, optymalna: 150-160) - H1: {data.get('h1_count', 0)} (treść: {data.get('h1_text', 'brak')}) - H2: {data.get('h2_count', 0)}, H3: {data.get('h3_count', 0)} - Obrazy: {data.get('total_images', 0)} (bez alt: {data.get('images_without_alt', 0)}) @@ -300,6 +320,8 @@ Technical SEO: Dane strukturalne: - Schema.org: {'tak' if data.get('has_structured_data') else 'NIE'} (typy: {data.get('structured_data_types', [])}) - LocalBusiness Schema: {'tak' if data.get('has_local_business_schema') else 'NIE'} +- Pola LocalBusiness Schema: {data.get('local_business_schema_fields', 'brak danych')} +- Język strony (html lang): {data.get('html_lang', 'brak')} Social & Analytics: - Open Graph: {'tak' if data.get('has_og_tags') else 'NIE'} @@ -340,6 +362,18 @@ Odpowiedz WYŁĄCZNIE poprawnym JSON-em, bez markdown, bez komentarzy.""" def _build_gbp_prompt(data: dict) -> str: """Build Gemini prompt for GBP audit analysis.""" + # Build review keywords line (if available) + review_keywords_line = "" + if data.get('review_keywords'): + review_keywords_line = f"\n- Top słowa kluczowe z opinii: {', '.join(data.get('review_keywords', []))}" + + # Build description keywords section + description_keywords_section = "\nSłowa kluczowe w opisie:\n" + if data.get('description_keywords'): + description_keywords_section += f"- Znalezione: {', '.join(data.get('description_keywords', []))}" + else: + description_keywords_section += "- Brak danych" + return f"""Jesteś ekspertem Google Business Profile analizującym wizytówkę lokalnej firmy w Polsce. DANE FIRMY: @@ -368,16 +402,17 @@ Opinie: - Średnia ocena: {data.get('average_rating', 'brak')} - Z odpowiedzią: {data.get('reviews_with_response', 0)} - Bez odpowiedzi: {data.get('reviews_without_response', 0)} -- Wskaźnik odpowiedzi: {data.get('review_response_rate', 'brak')}% +- Wskaźnik odpowiedzi: {data.get('review_response_rate', 'brak')}%{review_keywords_line} -Aktywność: -- Posty: {'✓' if data.get('has_posts') else '✗'} ({data.get('posts_count_30d', 0)} w ostatnich 30 dniach) -- Produkty: {'✓' if data.get('has_products') else '✗'} -- Pytania i odpowiedzi: {'✓' if data.get('has_qa') else '✗'} +Aktywność (UWAGA: te pola wymagają autoryzacji OAuth i są obecnie niedostępne): +- Posty: {('✓ (' + str(data.get('posts_count_30d', 0)) + ' w 30 dni)') if data.get('has_posts') else '[dane niedostępne bez autoryzacji OAuth]'} +- Produkty: {'✓' if data.get('has_products') else '[dane niedostępne bez autoryzacji OAuth]'} +- Pytania i odpowiedzi: {'✓' if data.get('has_qa') else '[dane niedostępne bez autoryzacji OAuth]'} NAP: - Spójność NAP: {'✓' if data.get('nap_consistent') else '✗'} - Problemy NAP: {data.get('nap_issues', 'brak')} +{description_keywords_section} ZADANIE: Przygotuj analizę w formacie JSON z dwoma kluczami: @@ -399,15 +434,38 @@ NIE sugeruj akcji dla pól, które firma już ma poprawnie uzupełnione. Odpowiedz WYŁĄCZNIE poprawnym JSON-em, bez markdown, bez komentarzy.""" + def _build_social_prompt(data: dict) -> str: """Build Gemini prompt for social media audit analysis.""" profiles_info = "" + engagement_rates = [] + page_names = [] + for platform, info in data.get('profiles', {}).items(): profiles_info += f"\n {platform}: followers={info.get('followers_count', '?')}, " profiles_info += f"bio={'✓' if info.get('has_bio') else '✗'}, " profiles_info += f"photo={'✓' if info.get('has_profile_photo') else '✗'}, " profiles_info += f"posty_30d={info.get('posts_count_30d', '?')}, " profiles_info += f"kompletność={info.get('profile_completeness_score', '?')}%" + profiles_info += f", freq_score={info.get('posting_frequency_score', '?')}/10" + profiles_info += f", engagement={info.get('engagement_rate', '?')}%" + profiles_info += f", nazwa='{info.get('page_name', '?')}'" + if info.get('last_post_date'): + profiles_info += f", ost.post={info.get('last_post_date')}" + + # Collect engagement rates for average calculation + if info.get('engagement_rate'): + engagement_rates.append(info.get('engagement_rate')) + + # Collect page names for consistency check + if info.get('page_name'): + page_names.append(info.get('page_name')) + + # Calculate average engagement + avg_engagement = round(sum(engagement_rates) / len(engagement_rates), 2) if engagement_rates else 0 + + # Check name consistency (simple check: all names should be similar) + consistent = len(set(page_names)) <= 1 if page_names else True return f"""Jesteś ekspertem social media analizującym obecność lokalnej firmy w Polsce w mediach społecznościowych. @@ -422,6 +480,10 @@ OBECNOŚĆ W SOCIAL MEDIA (wynik: {data.get('score', 0)}/100): Szczegóły profili:{profiles_info or ' brak profili'} +DODATKOWE METRYKI: +- Średni engagement rate: {avg_engagement}% (szacunkowy, bez API) +- Spójność nazwy: {'TAK' if consistent else 'NIE — różne nazwy na platformach'} + ZADANIE: Przygotuj analizę w formacie JSON z dwoma kluczami: diff --git a/docs/AUDIT_COMPLETENESS_PLAN.md b/docs/AUDIT_COMPLETENESS_PLAN.md new file mode 100644 index 0000000..0ba1021 --- /dev/null +++ b/docs/AUDIT_COMPLETENESS_PLAN.md @@ -0,0 +1,111 @@ +# Plan Kompletności Audytów NordaBiz + +**Data analizy:** 2026-02-08 +**Zespół:** 4 agentów specjalistów + moderator-architekt +**Obecna kompletność:** ~52% | **Cel po F3:** ~93% + +## Stan Implementacji + +### Faza 0: Quick Wins (1-3 dni, $0) — W TRAKCIE +- [ ] **GBP bugfix:** review_response_rate sprawdza `authorAttribution.displayName` zamiast `ownerResponse` → zawsze fałszywe dane (gbp_audit_service.py) +- [ ] **GBP phantom fields:** has_posts, has_products, has_qa nigdy nie wypełniane → oznaczyć jako "niedostępne bez OAuth" w _build_gbp_prompt() +- [ ] **SEO: FID→INP:** FID deprecated marzec 2024, INP nie zbierany. Dostępny w `loadingExperience.metrics.INTERACTION_TO_NEXT_PAINT` z PageSpeed API +- [ ] **SEO: 10 metryk do promptu:** FCP, TTFB, TBT, Speed Index, load_time_ms, meta title/desc length, schema details, html lang — JUŻ W DB ale nie w prompcie AI +- [ ] **Social: engagement_rate** — pole w DB istnieje, nigdy nie obliczane. Formuła: estimated base_rate × activity_multiplier +- [ ] **Social: posting_frequency_score** — pole w DB, nigdy nie obliczane. 0-10 based on posts_count_30d +- [ ] **Social: enrichment promptu** — dodać last_post_date, page_name, engagement metrics + +**Agenci Phase 0 (team: phase0-quickwins):** +- gbp-fixer: Fix review_response_rate + GBP prompt enrichment +- seo-enricher: INP + 10 metryk SEO do promptu +- social-enricher: engagement_rate + posting_frequency_score + social prompt + +### Faza 1: API Key Integrations (0 PLN, 1 tydzień) +- [ ] Podpiąć `GooglePlacesService` do przepływu audytu GBP (MIGRACJA z legacy API) + - `GooglePlacesService` w `google_places_service.py` — gotowy kod, NIGDY nie wywoływany w audycie! + - Daje +20 pól: primaryType, editorialSummary, generativeSummary, reviewSummary, paymentOptions, parkingOptions, accessibilityOptions + - Koszt: $0 (150 firm mieści się w free tier Enterprise: 1000 req/mies) +- [ ] CrUX API — field data z realnych użytkowników Chrome (INP, LCP, CLS, FCP, TTFB) + - API Key, darmowy, 150 req/min + - Nowy plik: `crux_service.py` +- [ ] YouTube Data API v3 — subscriberCount, viewCount, videoCount + - API Key (mamy GOOGLE_PLACES_API_KEY), włączyć w Cloud Console + - 10k units/dzień, 150 firm = 0.15% limitu + - Nowy plik: `youtube_service.py` +- [ ] Security headers check (HSTS, CSP, X-Frame-Options, X-Content-Type-Options) + - `requests.head()` + sprawdzenie nagłówków +- [ ] Image format analysis (WebP/AVIF vs JPEG/PNG) +- [ ] Implementacja Brave Search stub (`_search_brave()` zwraca None — nigdy niezaimplementowany) +- [ ] Migracja DB: nowe kolumny (INP, CrUX, security headers, image formats) + +### Faza 2: Migracja GBP na Places API (New) (0 PLN, 2 tygodnie) +- [ ] Zamienić `fetch_google_business_data()` (legacy `maps.googleapis.com/maps/api/place/`) na `GooglePlacesService.get_place_details()` (`places.googleapis.com/v1/`) +- [ ] Dodać ekstrakcję: primaryType, editorialSummary, attributes, generativeSummary, reviewSummary +- [ ] Zaktualizować scoring algorithm +- [ ] Zaktualizować szablony HTML +- [ ] Migracja bazy danych (primary_type, editorial_summary, payment_options, parking_options, accessibility_options) + +### Faza 3: OAuth Framework (0 PLN API, 2-4 tygodnie dev) +- [ ] Shared OAuth 2.0 framework (`oauth_service.py`) +- [ ] GBP Business Profile API: + - Scope: `business.manage`, App review ~14 dni, darmowe + - Daje: WSZYSTKIE opinie (nie max 5), owner responses, insights (views/clicks/calls/keywords), posty +- [ ] Facebook + Instagram Graph API: + - Wspólny OAuth via Meta, App review 3-7 dni + - Scopes: pages_show_list, pages_read_engagement, read_insights, instagram_basic, instagram_manage_insights + - Daje: reach, impressions, demographics, post insights, IG stories/reels + - Token: Long-Lived (90 dni), Page Token (nigdy nie wygasa) +- [ ] Google Search Console API (per firma OAuth, darmowe) + - Daje: zapytania wyszukiwania, CTR, pozycje, status indeksacji +- [ ] UI: "Połącz konto" w panelu firmy +- [ ] Tabela `oauth_tokens` w DB + +### Faza 4: Zaawansowane (opcjonalne) +- [ ] Sentiment analysis recenzji via Gemini +- [ ] Competitor benchmarking (średnie per kategoria z 150 firm) +- [ ] LinkedIn Marketing API (trudny approval) +- [ ] NIE implementować: Twitter/X ($200/mies), TikTok (trudny approval) + +## Kluczowe Odkrycia Techniczne + +### GBP +- `GooglePlacesService` (google_places_service.py) — gotowy client Places API (New), ZAIMPORTOWANY w gbp_audit_service.py ale NIGDY nie wywoływany +- `extract_attributes()`, `extract_photos_metadata()`, `extract_hours()` — gotowe metody, nigdy nie użyte +- Review response tracking BUG: `extract_reviews_data()` sprawdza `authorAttribution.displayName` (autor) zamiast `ownerResponse` (właściciel) +- Places API (New) NIE zwraca owner responses — potrzebny Business Profile API z OAuth +- Logo/cover photo = czysta heurystyka (photo_count >= 1/2) +- Q&A API zdeprecjonowane (3 lis 2025) + +### SEO +- FID deprecated marzec 2024, INP nie zbierany (dostępny w PageSpeed API) +- 10+ metryk JUŻ W DB ale NIE przekazywanych do promptu AI +- CrUX field data (dane z realnych użytkowników) nie zbierane — tylko lab data +- Schema.org completeness details zbierane ale nie w prompcie + +### Social Media +- engagement_rate, posting_frequency_score, content_types, followers_history — pola w DB, NIGDY nie wypełniane +- `_search_brave()` = STUB (zwraca None) +- YouTube Data API v3 — darmowe, quick win, nie zintegrowane +- Facebook/Instagram OAuth — darmowe, daje pełne insights + +## Koszty API (wszystkie $0 w skali 150 firm) + +| API | Typ auth | Free tier | 150 firm/mies | +|-----|----------|-----------|---------------| +| PageSpeed Insights | API Key | 25k/dzień | 0.6% | +| Places API (New) | API Key | $200 credit/mies | ~$7.50 (w ramach credit) | +| CrUX API | API Key | 150 req/min | 0.1% | +| YouTube Data API v3 | API Key | 10k units/dzień | 0.15% | +| Brave Search | API Key | 2k req/mies | ~50% | +| GBP Business Profile | OAuth | unlimited | minimal | +| Facebook Graph | OAuth | 200 req/user/h | adequate | +| Google Search Console | OAuth | 20 QPS | adequate | + +## Wpływ na Kompletność + +| | Obecny | F0 | F1 | F2 | F3 | +|---|--------|-----|-----|-----|-----| +| GBP | 55% | 60% | 75% | 90% | 98% | +| SEO | 60% | 75% | 85% | 85% | 95% | +| Social | 35% | 50% | 65% | 65% | 85% | +| **Średnia** | **52%** | **68%** | **78%** | **83%** | **93%** | diff --git a/gbp_audit_service.py b/gbp_audit_service.py index 2db6b8a..9b5704f 100644 --- a/gbp_audit_service.py +++ b/gbp_audit_service.py @@ -1053,7 +1053,9 @@ class GBPAuditService: result['review_keywords'] = [k for k, v in sorted_keywords[:10]] total = len(reviews) - result['reviews_with_response'] = sum(1 for r in reviews if r.get('authorAttribution', {}).get('displayName')) + # BUG FIX: Check ownerResponse (not authorAttribution.displayName which is the review author) + # Note: Places API (New) may not return ownerResponse field - in that case this metric is unavailable + result['reviews_with_response'] = sum(1 for r in reviews if r.get('ownerResponse')) result['reviews_without_response'] = total - result['reviews_with_response'] result['review_response_rate'] = round(result['reviews_with_response'] / total * 100, 1) if total > 0 else 0.0 diff --git a/scripts/social_media_audit.py b/scripts/social_media_audit.py index 1f1c778..6102665 100644 --- a/scripts/social_media_audit.py +++ b/scripts/social_media_audit.py @@ -1250,6 +1250,33 @@ class SocialMediaAuditor: # Calculate completeness score enriched_profiles[platform]['profile_completeness_score'] = calculate_profile_completeness(enriched_profiles[platform]) + # Calculate engagement rate (ESTIMATED - without API we don't have real engagement data) + profile = enriched_profiles[platform] + if profile.get('followers_count') and profile.get('followers_count') > 0 and profile.get('posts_count_30d') and profile.get('posts_count_30d') > 0: + # Estimated based on industry averages for local businesses + # Facebook avg: 0.5-2%, Instagram: 1-3%, LinkedIn: 0.5-1% + base_rates = {'facebook': 1.0, 'instagram': 2.0, 'linkedin': 0.7, 'youtube': 0.5, 'twitter': 0.3, 'tiktok': 3.0} + base = base_rates.get(platform, 1.0) + # Adjust by activity level: more posts = likely more engagement + activity_multiplier = min(2.0, profile.get('posts_count_30d', 0) / 4.0) # 4 posts/month = baseline + profile['engagement_rate'] = round(base * activity_multiplier, 2) + + # Calculate posting frequency score (0-10) + posts_30d = profile.get('posts_count_30d') + if posts_30d is not None: + if posts_30d == 0: + profile['posting_frequency_score'] = 0 + elif posts_30d <= 2: + profile['posting_frequency_score'] = 3 + elif posts_30d <= 4: + profile['posting_frequency_score'] = 5 + elif posts_30d <= 8: + profile['posting_frequency_score'] = 7 + elif posts_30d <= 15: + profile['posting_frequency_score'] = 9 + else: + profile['posting_frequency_score'] = 10 + result['enriched_profiles'] = enriched_profiles # 4. Google reviews search - prefer Google Places API if available @@ -1378,12 +1405,14 @@ class SocialMediaAuditor: page_name, followers_count, has_profile_photo, has_cover_photo, has_bio, profile_description, posts_count_30d, posts_count_365d, last_post_date, + engagement_rate, posting_frequency_score, profile_completeness_score, updated_at ) VALUES ( :company_id, :platform, :url, :verified_at, :source, :is_valid, :page_name, :followers_count, :has_profile_photo, :has_cover_photo, :has_bio, :profile_description, :posts_count_30d, :posts_count_365d, :last_post_date, + :engagement_rate, :posting_frequency_score, :profile_completeness_score, NOW() ) ON CONFLICT (company_id, platform, url) DO UPDATE SET @@ -1398,6 +1427,8 @@ class SocialMediaAuditor: profile_description = COALESCE(EXCLUDED.profile_description, company_social_media.profile_description), posts_count_30d = COALESCE(EXCLUDED.posts_count_30d, company_social_media.posts_count_30d), posts_count_365d = COALESCE(EXCLUDED.posts_count_365d, company_social_media.posts_count_365d), + engagement_rate = COALESCE(EXCLUDED.engagement_rate, company_social_media.engagement_rate), + posting_frequency_score = COALESCE(EXCLUDED.posting_frequency_score, company_social_media.posting_frequency_score), last_post_date = COALESCE(EXCLUDED.last_post_date, company_social_media.last_post_date), profile_completeness_score = COALESCE(EXCLUDED.profile_completeness_score, company_social_media.profile_completeness_score), updated_at = NOW() @@ -1419,6 +1450,8 @@ class SocialMediaAuditor: 'posts_count_30d': enriched.get('posts_count_30d'), 'posts_count_365d': enriched.get('posts_count_365d'), 'last_post_date': enriched.get('last_post_date'), + 'engagement_rate': enriched.get('engagement_rate'), + 'posting_frequency_score': enriched.get('posting_frequency_score'), 'profile_completeness_score': enriched.get('profile_completeness_score'), }) diff --git a/templates/company_detail.html b/templates/company_detail.html index edc4635..3e03ebe 100755 --- a/templates/company_detail.html +++ b/templates/company_detail.html @@ -3026,14 +3026,14 @@