nordabiz/youtube_service.py
Maciej Pienczyn ce6aa53c78
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
feat(audit): Phase 1 - YouTube API, CrUX field data, security headers, image formats
New services:
- youtube_service.py: YouTube Data API v3 integration for channel stats
  (subscriber count, view count, video count)
- crux_service.py: Chrome UX Report API for real user field data
  (INP, LCP, CLS, FCP, TTFB from actual Chrome users)

SEO audit enrichment:
- Security headers check: HSTS, CSP, X-Frame-Options, X-Content-Type-Options
  via live requests.head() during data collection
- Image format analysis: WebP/AVIF/SVG vs legacy JPEG/PNG ratio
- CrUX field data complements existing PageSpeed lab data in AI prompt
- All new metrics passed to Gemini for richer analysis

Social media audit enrichment:
- YouTube API data (video count, views, subscribers) integrated into
  social media AI prompt when YouTube profile exists

All APIs use existing GOOGLE_PLACES_API_KEY (free tier, $0 cost).
Completeness: ~68% → ~78% (estimated)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-08 11:32:03 +01:00

161 lines
5.3 KiB
Python

"""
YouTube Data API v3 Service for NordaBiz
=========================================
Simple YouTube API client for fetching channel statistics.
Uses the YouTube Data API v3 with the same Google API key as Places API.
API Reference: https://developers.google.com/youtube/v3/docs/channels
Author: NordaBiz Development Team
Created: 2026-02-08
"""
import os
import re
import logging
from typing import Optional, Dict
import requests
logger = logging.getLogger(__name__)
# API Configuration
YOUTUBE_API_BASE = "https://www.googleapis.com/youtube/v3"
class YouTubeService:
"""Fetches YouTube channel statistics via YouTube Data API v3."""
def __init__(self, api_key: str = None):
self.api_key = api_key or os.getenv('GOOGLE_PLACES_API_KEY')
if not self.api_key:
raise ValueError("GOOGLE_PLACES_API_KEY not set in environment")
self.session = requests.Session()
def extract_channel_id_from_url(self, url: str) -> Optional[str]:
"""
Extract channel ID or handle from YouTube URL.
Supported formats:
- youtube.com/channel/UC1234567890abcdef
- youtube.com/@handle
- youtube.com/c/channelname
- youtube.com/user/username
Args:
url: YouTube channel URL
Returns:
Channel ID (starts with UC) or handle (without @) or None
"""
if not url:
return None
# Direct channel ID (UC...)
match = re.search(r'youtube\.com/channel/([A-Za-z0-9_-]+)', url)
if match:
return match.group(1)
# Handle (@username)
match = re.search(r'youtube\.com/@([A-Za-z0-9_-]+)', url)
if match:
return match.group(1) # Return without @
# Legacy /c/ and /user/ formats
match = re.search(r'youtube\.com/(?:c|user)/([A-Za-z0-9_-]+)', url)
if match:
return match.group(1)
logger.warning(f"Unable to extract channel ID from URL: {url}")
return None
def get_channel_stats(self, channel_id_or_username: str) -> Optional[Dict]:
"""
Fetch channel statistics from YouTube Data API v3.
Args:
channel_id_or_username: YouTube channel ID (UC...) or username/handle
Returns:
Dict with channel stats or None on error:
{
'subscriber_count': int,
'view_count': int,
'video_count': int,
'channel_title': str,
'channel_description': str
}
"""
if not channel_id_or_username:
return None
url = f"{YOUTUBE_API_BASE}/channels"
# Determine if it's a channel ID (starts with UC) or handle/username
if channel_id_or_username.startswith('UC'):
params = {
'part': 'statistics,snippet',
'id': channel_id_or_username,
'key': self.api_key
}
else:
# For handles, we need to use forHandle (modern) or forUsername (legacy)
params = {
'part': 'statistics,snippet',
'forHandle': channel_id_or_username,
'key': self.api_key
}
try:
response = self.session.get(url, params=params, timeout=15)
response.raise_for_status()
data = response.json()
items = data.get('items', [])
if not items:
# Try forUsername as fallback
if not channel_id_or_username.startswith('UC'):
params = {
'part': 'statistics,snippet',
'forUsername': channel_id_or_username,
'key': self.api_key
}
response = self.session.get(url, params=params, timeout=15)
response.raise_for_status()
data = response.json()
items = data.get('items', [])
if not items:
logger.warning(f"No YouTube channel found for: {channel_id_or_username}")
return None
channel = items[0]
stats = channel.get('statistics', {})
snippet = channel.get('snippet', {})
result = {
'subscriber_count': int(stats.get('subscriberCount', 0)),
'view_count': int(stats.get('viewCount', 0)),
'video_count': int(stats.get('videoCount', 0)),
'channel_title': snippet.get('title', ''),
'channel_description': snippet.get('description', '')
}
logger.info(f"Fetched YouTube stats for {result['channel_title']}: "
f"{result['subscriber_count']} subscribers, "
f"{result['video_count']} videos")
return result
except requests.exceptions.HTTPError as e:
logger.warning(f"YouTube API HTTP error for {channel_id_or_username}: "
f"{e.response.status_code} - {e.response.text}")
return None
except requests.exceptions.RequestException as e:
logger.warning(f"YouTube API request error for {channel_id_or_username}: {e}")
return None
except (KeyError, ValueError, TypeError) as e:
logger.warning(f"YouTube API response parse error: {e}")
return None