feat: YouTube Data API v3 integration for social media enrichment

- YouTubeService now fetches: subscribers, views, video count, description, avatar, banner, country, creation date, recent 5 videos - Enricher uses API first, falls back to scraping - Extra YouTube data stored in content_types JSONB - Audit detail shows view count, country, creation date, recent videos - Requires enabling YouTube Data API v3 in Google Cloud Console Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-12 14:01:15 +01:00 · 2026-03-12 14:01:15 +01:00 · fe288f0441
commit fe288f0441
parent af225691b6
4 changed files with 146 additions and 26 deletions
--- a/blueprints/admin/routes_social.py
+++ b/blueprints/admin/routes_social.py
@ -974,7 +974,7 @@ def _run_enrichment_background(company_ids, platforms_filter=None):
                            profile_result['enriched_data'] = {
                                k: (str(v) if hasattr(v, 'strftime') else v)
                                for k, v in enriched.items()
-                                if k in tracked_fields and v is not None
+                                if (k in tracked_fields or k.startswith('_')) and v is not None
                            }

                            if changes:
@ -1264,6 +1264,12 @@ def admin_social_audit_enrichment_approve():

                enriched = change['enriched_data']
                for field, value in enriched.items():
+                    if field.startswith('_') and field.endswith('_extra'):
+                        # Store platform-specific extra data in content_types JSONB
+                        ct = dict(profile.content_types or {})
+                        ct.update(value if isinstance(value, dict) else {})
+                        profile.content_types = ct
+                        continue
                    if field == 'last_post_date' and isinstance(value, str):
                        try:
                            from dateutil.parser import parse as parse_date
--- a/scripts/social_media_audit.py
+++ b/scripts/social_media_audit.py
@ -1168,36 +1168,69 @@ class SocialProfileEnricher:
        return result

    def _enrich_youtube(self, url: str) -> Dict[str, Any]:
-        """Enrich YouTube channel data."""
+        """Enrich YouTube channel data via YouTube Data API v3.
+        Falls back to scraping if API key is not available.
+        """
        result = {}
+        try:
+            # Try YouTube Data API v3 first
+            import sys
+            from pathlib import Path
+            sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+            from youtube_service import YouTubeService
+            yt = YouTubeService()
+            channel_id = yt.extract_channel_id_from_url(url)
+            if channel_id:
+                stats = yt.get_channel_stats(channel_id)
+                if stats:
+                    result['followers_count'] = stats['subscriber_count']
+                    result['posts_count_365d'] = stats['video_count']
+                    result['page_name'] = stats['channel_title']
+                    if stats.get('channel_description'):
+                        result['profile_description'] = stats['channel_description'][:500]
+                        result['has_bio'] = True
+                    result['has_profile_photo'] = bool(stats.get('thumbnail_url'))
+                    result['has_cover_photo'] = bool(stats.get('banner_url'))
+                    # Store extra data in a special key for content_types JSONB
+                    result['_youtube_extra'] = {
+                        'view_count': stats.get('view_count', 0),
+                        'country': stats.get('country', ''),
+                        'published_at': stats.get('published_at', '')[:10] if stats.get('published_at') else '',
+                        'custom_url': stats.get('custom_url', ''),
+                        'thumbnail_url': stats.get('thumbnail_url', ''),
+                        'banner_url': stats.get('banner_url', ''),
+                    }
+                    # Fetch recent videos (best effort)
+                    actual_channel_id = stats.get('channel_id', channel_id)
+                    if actual_channel_id.startswith('UC'):
+                        videos = yt.get_recent_videos(actual_channel_id, 5)
+                        if videos:
+                            result['_youtube_extra']['recent_videos'] = videos
+                            # Last post date from most recent video
+                            if videos[0].get('date'):
+                                from datetime import datetime
+                                try:
+                                    result['last_post_date'] = datetime.strptime(videos[0]['date'], '%Y-%m-%d')
+                                except ValueError:
+                                    pass
+                    return result
+        except (ImportError, ValueError) as e:
+            logger.debug(f"YouTube API not available ({e}), falling back to scraping")
+        except Exception as e:
+            logger.debug(f"YouTube API enrichment failed: {e}")
+
+        # Fallback: scraping (usually returns nothing due to JS rendering)
        try:
            resp = self.session.get(url, timeout=REQUEST_TIMEOUT)
            if resp.status_code == 200:
                html = resp.text
-                # Subscriber count from meta or JSON
-                subs_match = re.search(r'"subscriberCountText":\s*\{"simpleText":\s*"([^"]+)"\}', html)
-                if subs_match:
-                    result['followers_count'] = self._parse_count(subs_match.group(1).split(' ')[0])
-                # Video count
-                videos_match = re.search(r'"videosCountText":\s*\{"runs":\s*\[\{"text":\s*"([^"]+)"\}', html)
-                if videos_match:
-                    result['posts_count_365d'] = self._parse_count(videos_match.group(1))
-                # Channel description
-                desc_match = re.search(r'"description":\s*"([^"]*(?:\\.[^"]*)*)"', html)
-                if desc_match:
-                    desc = desc_match.group(1).replace('\\n', ' ').strip()
-                    if desc and len(desc) > 5:
-                        result['profile_description'] = desc[:500]
-                        result['has_bio'] = True
-                # Avatar from og:image
                og_img = re.search(r'<meta\s+(?:property|name)="og:image"\s+content="([^"]+)"', html)
                result['has_profile_photo'] = bool(og_img)
-                # Channel name
                name_match = re.search(r'<meta\s+(?:property|name)="og:title"\s+content="([^"]+)"', html)
                if name_match:
                    result['page_name'] = name_match.group(1)
        except Exception as e:
-            logger.debug(f"YouTube enrichment failed: {e}")
+            logger.debug(f"YouTube scraping failed: {e}")
        return result

    def _enrich_linkedin(self, url: str) -> Dict[str, Any]:
--- a/templates/admin/social_audit_detail.html
+++ b/templates/admin/social_audit_detail.html
@ -832,6 +832,38 @@
                        {% endfor %}
                    </div>
                    {% endif %}
+
+                    <!-- YouTube extra data -->
+                    {% if ct.get('view_count') or ct.get('recent_videos') %}
+                    <div style="margin-top: var(--spacing-sm);">
+                        {% set yt_info = [] %}
+                        {% if ct.get('view_count') %}{% if yt_info.append(('👁️', '{:,}'.format(ct.view_count).replace(',', ' ') ~ ' wyświetleń łącznie')) %}{% endif %}{% endif %}
+                        {% if ct.get('country') %}{% if yt_info.append(('🌍', ct.country)) %}{% endif %}{% endif %}
+                        {% if ct.get('published_at') %}{% if yt_info.append(('📅', 'Kanał od: ' ~ ct.published_at)) %}{% endif %}{% endif %}
+                        {% if ct.get('custom_url') %}{% if yt_info.append(('🔗', ct.custom_url)) %}{% endif %}{% endif %}
+                        {% if yt_info %}
+                        <div style="display: flex; gap: var(--spacing-xs); flex-wrap: wrap; margin-bottom: var(--spacing-sm);">
+                            {% for icon, val in yt_info %}
+                            <span style="background: var(--background); padding: 3px 10px; border-radius: var(--radius); font-size: 12px; color: var(--text-secondary); border: 1px solid var(--border-color, #e5e7eb);">
+                                {{ icon }} {{ val }}
+                            </span>
+                            {% endfor %}
+                        </div>
+                        {% endif %}
+                        {% if ct.get('recent_videos') %}
+                        <div style="font-size: 11px; font-weight: 600; color: var(--text-secondary); margin-bottom: 4px;">Ostatnie filmy</div>
+                        {% for vid in ct.recent_videos %}
+                        <div style="padding: 4px 0; border-bottom: 1px solid #f3f4f6; font-size: 12px; display: flex; gap: var(--spacing-sm); align-items: baseline;">
+                            <span style="color: var(--text-secondary); white-space: nowrap;">{{ vid.date }}</span>
+                            <span style="flex: 1; overflow: hidden; text-overflow: ellipsis; white-space: nowrap;">{{ vid.title or '(bez tytułu)' }}</span>
+                            {% if vid.video_id %}
+                            <a href="https://youtube.com/watch?v={{ vid.video_id }}" target="_blank" rel="noopener" style="font-size: 11px; color: #dc2626; white-space: nowrap;">▶ Oglądaj</a>
+                            {% endif %}
+                        </div>
+                        {% endfor %}
+                        {% endif %}
+                    </div>
+                    {% endif %}
                    {% endif %}

                    <!-- Data Provenance Section -->
--- a/youtube_service.py
+++ b/youtube_service.py
@ -28,9 +28,9 @@ class YouTubeService:
    """Fetches YouTube channel statistics via YouTube Data API v3."""

    def __init__(self, api_key: str = None):
-        self.api_key = api_key or os.getenv('GOOGLE_PLACES_API_KEY')
+        self.api_key = api_key or os.getenv('YOUTUBE_API_KEY') or os.getenv('GOOGLE_PAGESPEED_API_KEY') or os.getenv('GOOGLE_PLACES_API_KEY')
        if not self.api_key:
-            raise ValueError("GOOGLE_PLACES_API_KEY not set in environment")
+            raise ValueError("No Google API key found (YOUTUBE_API_KEY, GOOGLE_PAGESPEED_API_KEY, or GOOGLE_PLACES_API_KEY)")
        self.session = requests.Session()

    def extract_channel_id_from_url(self, url: str) -> Optional[str]:
@ -95,14 +95,14 @@ class YouTubeService:
        # Determine if it's a channel ID (starts with UC) or handle/username
        if channel_id_or_username.startswith('UC'):
            params = {
-                'part': 'statistics,snippet',
+                'part': 'statistics,snippet,brandingSettings',
                'id': channel_id_or_username,
                'key': self.api_key
            }
        else:
            # For handles, we need to use forHandle (modern) or forUsername (legacy)
            params = {
-                'part': 'statistics,snippet',
+                'part': 'statistics,snippet,brandingSettings',
                'forHandle': channel_id_or_username,
                'key': self.api_key
            }
@ -117,7 +117,7 @@ class YouTubeService:
                # Try forUsername as fallback
                if not channel_id_or_username.startswith('UC'):
                    params = {
-                        'part': 'statistics,snippet',
+                        'part': 'statistics,snippet,brandingSettings',
                        'forUsername': channel_id_or_username,
                        'key': self.api_key
                    }
@ -131,15 +131,26 @@ class YouTubeService:
                    return None

            channel = items[0]
+            channel_id = channel.get('id', channel_id_or_username)
            stats = channel.get('statistics', {})
            snippet = channel.get('snippet', {})
+            branding = channel.get('brandingSettings', {}).get('channel', {})
+            thumbnails = snippet.get('thumbnails', {})

            result = {
+                'channel_id': channel_id,
                'subscriber_count': int(stats.get('subscriberCount', 0)),
+                'hidden_subscriber_count': stats.get('hiddenSubscriberCount', False),
                'view_count': int(stats.get('viewCount', 0)),
                'video_count': int(stats.get('videoCount', 0)),
                'channel_title': snippet.get('title', ''),
-                'channel_description': snippet.get('description', '')
+                'channel_description': snippet.get('description', ''),
+                'custom_url': snippet.get('customUrl', ''),
+                'published_at': snippet.get('publishedAt', ''),
+                'country': snippet.get('country', ''),
+                'thumbnail_url': thumbnails.get('high', thumbnails.get('default', {})).get('url', ''),
+                'banner_url': channel.get('brandingSettings', {}).get('image', {}).get('bannerExternalUrl', ''),
+                'keywords': branding.get('keywords', ''),
            }

            logger.info(f"Fetched YouTube stats for {result['channel_title']}: "
@ -158,3 +169,41 @@ class YouTubeService:
        except (KeyError, ValueError, TypeError) as e:
            logger.warning(f"YouTube API response parse error: {e}")
            return None
+
+    def get_recent_videos(self, channel_id: str, max_results: int = 5) -> list:
+        """Fetch recent videos from a channel (costs 100 quota units).
+
+        Args:
+            channel_id: YouTube channel ID (UC...)
+            max_results: Number of videos to fetch (max 50)
+
+        Returns:
+            List of dicts with video info, or empty list on error.
+        """
+        if not channel_id or not channel_id.startswith('UC'):
+            return []
+
+        try:
+            resp = self.session.get(f"{YOUTUBE_API_BASE}/search", params={
+                'part': 'snippet',
+                'channelId': channel_id,
+                'order': 'date',
+                'type': 'video',
+                'maxResults': max_results,
+                'key': self.api_key,
+            }, timeout=15)
+            resp.raise_for_status()
+            items = resp.json().get('items', [])
+
+            videos = []
+            for item in items:
+                snippet = item.get('snippet', {})
+                videos.append({
+                    'title': snippet.get('title', ''),
+                    'date': snippet.get('publishedAt', '')[:10],
+                    'video_id': item.get('id', {}).get('videoId', ''),
+                })
+            return videos
+        except Exception as e:
+            logger.debug(f"YouTube recent videos fetch failed: {e}")
+            return []