diff --git a/blueprints/admin/routes_user_insights.py b/blueprints/admin/routes_user_insights.py index e770fa7..ef850d3 100644 --- a/blueprints/admin/routes_user_insights.py +++ b/blueprints/admin/routes_user_insights.py @@ -9,6 +9,7 @@ page popularity, user flows, and behavioral profiles. import csv import io import logging +import math from datetime import date, timedelta, datetime from flask import render_template, request, redirect, url_for, flash, Response @@ -27,6 +28,21 @@ from utils.decorators import role_required logger = logging.getLogger(__name__) +def _non_bot_sessions(db, start_dt=None): + """Subquery of non-bot session IDs for filtering page_views.""" + q = db.query(UserSession.id).filter(UserSession.is_bot == False) + if start_dt: + q = q.filter(UserSession.started_at >= start_dt) + return q + + +def _log_engagement_score(raw): + """Logarithmic engagement score: better distribution than linear capped at 100.""" + if raw <= 0: + return 0 + return min(100, int(math.log2(raw + 1) * 6)) + + def _get_period_dates(period): """Return (start_date, days) for given period string.""" today = date.today() @@ -95,11 +111,9 @@ def _tab_problems(db, start_date, days): User.locked_until > now, User.is_active == True ).scalar() or 0 - failed_logins_7d = db.query( - func.coalesce(func.sum(User.failed_login_attempts), 0) - ).filter( - User.is_active == True, - User.failed_login_attempts > 0 + failed_logins_7d = db.query(func.count(AuditLog.id)).filter( + AuditLog.action == 'login_failed', + AuditLog.created_at >= start_dt ).scalar() or 0 password_resets_7d = db.query(func.count(EmailLog.id)).filter( @@ -116,8 +130,12 @@ def _tab_problems(db, start_date, days): problem_users = [] for user in users: - # Failed logins - fl = user.failed_login_attempts or 0 + # Failed logins (from audit_logs, time-based) + fl = db.query(func.count(AuditLog.id)).filter( + AuditLog.user_email == user.email, + AuditLog.action == 'login_failed', + AuditLog.created_at >= start_dt + ).scalar() or 0 # Security alerts 7d sa_7d = db.query(func.count(SecurityAlert.id)).filter( @@ -292,7 +310,8 @@ def _tab_engagement(db, start_date, days): # Stat cards active_7d = db.query(func.count(func.distinct(UserSession.user_id))).filter( UserSession.user_id.isnot(None), - UserSession.started_at >= start_dt + UserSession.started_at >= start_dt, + UserSession.is_bot == False ).scalar() or 0 all_users = db.query(User).filter(User.is_active == True).all() @@ -321,49 +340,57 @@ def _tab_engagement(db, start_date, days): engagement_list = [] for user in registered_users: - # Current period + # Current period (exclude bots) sessions_cur = db.query(func.count(UserSession.id)).filter( UserSession.user_id == user.id, - UserSession.started_at >= start_dt + UserSession.started_at >= start_dt, + UserSession.is_bot == False ).scalar() or 0 pv_cur = db.query(func.count(PageView.id)).filter( PageView.user_id == user.id, - PageView.viewed_at >= start_dt + PageView.viewed_at >= start_dt, + PageView.session_id.in_(_non_bot_sessions(db, start_dt)) ).scalar() or 0 # Previous period for WoW sessions_prev = db.query(func.count(UserSession.id)).filter( UserSession.user_id == user.id, UserSession.started_at >= prev_start, - UserSession.started_at < start_dt + UserSession.started_at < start_dt, + UserSession.is_bot == False ).scalar() or 0 pv_prev = db.query(func.count(PageView.id)).filter( PageView.user_id == user.id, PageView.viewed_at >= prev_start, - PageView.viewed_at < start_dt + PageView.viewed_at < start_dt, + PageView.session_id.in_(_non_bot_sessions(db, prev_start)) ).scalar() or 0 - # 30d engagement score components + # 30d engagement score components (exclude bots) s30 = db.query(func.count(UserSession.id)).filter( UserSession.user_id == user.id, - UserSession.started_at >= start_30d + UserSession.started_at >= start_30d, + UserSession.is_bot == False ).scalar() or 0 pv30 = db.query(func.count(PageView.id)).filter( PageView.user_id == user.id, - PageView.viewed_at >= start_30d + PageView.viewed_at >= start_30d, + PageView.session_id.in_(_non_bot_sessions(db, start_30d)) ).scalar() or 0 clicks30 = db.query(func.sum(UserSession.clicks_count)).filter( UserSession.user_id == user.id, - UserSession.started_at >= start_30d + UserSession.started_at >= start_30d, + UserSession.is_bot == False ).scalar() or 0 dur30 = db.query(func.sum(UserSession.duration_seconds)).filter( UserSession.user_id == user.id, - UserSession.started_at >= start_30d + UserSession.started_at >= start_30d, + UserSession.is_bot == False ).scalar() or 0 conv30 = db.query(func.count(ConversionEvent.id)).filter( @@ -376,11 +403,9 @@ def _tab_engagement(db, start_date, days): SearchQuery.searched_at >= start_30d ).scalar() or 0 - score = min(100, - s30 * 3 + pv30 * 1 + int(clicks30) * 0.5 + - int(dur30) / 60 * 2 + conv30 * 10 + search30 * 2 - ) - score = int(score) + raw = (s30 * 3 + pv30 * 1 + int(clicks30) * 0.5 + + int(dur30) / 60 * 2 + conv30 * 10 + search30 * 2) + score = _log_engagement_score(raw) # WoW change wow = None @@ -444,16 +469,24 @@ def _tab_pages(db, start_date, days): """Page popularity map.""" start_dt = datetime.combine(start_date, datetime.min.time()) - # Page sections with grouping + # Page sections with grouping (expanded to cover ~95% of traffic) section_map = { 'Strona główna': ['/'], 'Profile firm': ['/company/'], 'Forum': ['/forum'], 'Chat': ['/chat'], 'Wyszukiwarka': ['/search', '/szukaj'], - 'Wydarzenia': ['/events', '/wydarzenia'], - 'Ogłoszenia': ['/classifieds', '/ogloszenia'], - 'Członkostwo': ['/membership', '/czlonkostwo'], + 'Wydarzenia': ['/events', '/wydarzenia', '/kalendarz'], + 'Ogłoszenia': ['/classifieds', '/ogloszenia', '/tablica'], + 'Członkostwo': ['/membership', '/czlonkostwo', '/korzysci'], + 'Logowanie': ['/login', '/register', '/forgot-password', '/reset-password', '/verify-email'], + 'Panel użytkownika': ['/dashboard', '/konto'], + 'Wiadomości': ['/wiadomosci'], + 'Edukacja': ['/edukacja'], + 'Rada': ['/rada'], + 'ZOPK': ['/zopk'], + 'Kontakty': ['/kontakty'], + 'Raporty': ['/raporty'], 'Admin': ['/admin'], } @@ -467,9 +500,10 @@ def _tab_pages(db, start_date, days): func.count(PageView.id).label('views'), func.count(func.distinct(PageView.user_id)).label('unique_users'), func.avg(PageView.time_on_page_seconds).label('avg_time') - ).filter( + ).join(UserSession, PageView.session_id == UserSession.id).filter( or_(*conditions), - PageView.viewed_at >= start_dt + PageView.viewed_at >= start_dt, + UserSession.is_bot == False ).first() sections.append({ @@ -484,7 +518,7 @@ def _tab_pages(db, start_date, days): for s in sections: s['intensity'] = min(100, int(s['views'] / max_views * 100)) - # Top 50 pages + # Top 50 pages (exclude bots) top_pages = db.query( PageView.path, func.count(PageView.id).label('views'), @@ -492,8 +526,9 @@ def _tab_pages(db, start_date, days): func.avg(PageView.time_on_page_seconds).label('avg_time'), func.avg(PageView.scroll_depth_percent).label('avg_scroll'), func.avg(PageView.load_time_ms).label('avg_load'), - ).filter( - PageView.viewed_at >= start_dt + ).join(UserSession, PageView.session_id == UserSession.id).filter( + PageView.viewed_at >= start_dt, + UserSession.is_bot == False ).group_by(PageView.path).order_by(desc('views')).limit(50).all() max_page_views = top_pages[0].views if top_pages else 1 @@ -510,13 +545,14 @@ def _tab_pages(db, start_date, days): 'bar_pct': int(p.views / max_page_views * 100), }) - # Ignored pages (< 5 views in 30d) + # Ignored pages (< 5 views in 30d, exclude bots) start_30d = datetime.combine(date.today() - timedelta(days=30), datetime.min.time()) ignored = db.query( PageView.path, func.count(PageView.id).label('views'), - ).filter( - PageView.viewed_at >= start_30d + ).join(UserSession, PageView.session_id == UserSession.id).filter( + PageView.viewed_at >= start_30d, + UserSession.is_bot == False ).group_by(PageView.path).having( func.count(PageView.id) < 5 ).order_by('views').limit(30).all() @@ -536,13 +572,14 @@ def _tab_paths(db, start_date, days): """User flow analysis.""" start_dt = datetime.combine(start_date, datetime.min.time()) - # Entry pages - first page in each session + # Entry pages - first page in each session (exclude bots) entry_sql = text(""" WITH first_pages AS ( - SELECT DISTINCT ON (session_id) path - FROM page_views - WHERE viewed_at >= :start_dt AND session_id IS NOT NULL - ORDER BY session_id, viewed_at ASC + SELECT DISTINCT ON (pv.session_id) pv.path + FROM page_views pv + JOIN user_sessions us ON pv.session_id = us.id + WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false + ORDER BY pv.session_id, pv.viewed_at ASC ) SELECT path, COUNT(*) as cnt FROM first_pages @@ -550,13 +587,14 @@ def _tab_paths(db, start_date, days): """) entry_pages = db.execute(entry_sql, {'start_dt': start_dt}).fetchall() - # Exit pages - last page in each session + # Exit pages - last page in each session (exclude bots) exit_sql = text(""" WITH last_pages AS ( - SELECT DISTINCT ON (session_id) path - FROM page_views - WHERE viewed_at >= :start_dt AND session_id IS NOT NULL - ORDER BY session_id, viewed_at DESC + SELECT DISTINCT ON (pv.session_id) pv.path + FROM page_views pv + JOIN user_sessions us ON pv.session_id = us.id + WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false + ORDER BY pv.session_id, pv.viewed_at DESC ) SELECT path, COUNT(*) as cnt FROM last_pages @@ -567,13 +605,14 @@ def _tab_paths(db, start_date, days): max_entry = entry_pages[0].cnt if entry_pages else 1 max_exit = exit_pages[0].cnt if exit_pages else 1 - # Top transitions + # Top transitions (exclude bots) transitions_sql = text(""" WITH ordered AS ( - SELECT session_id, path, - LEAD(path) OVER (PARTITION BY session_id ORDER BY viewed_at) AS next_path - FROM page_views - WHERE viewed_at >= :start_dt AND session_id IS NOT NULL + SELECT pv.session_id, pv.path, + LEAD(pv.path) OVER (PARTITION BY pv.session_id ORDER BY pv.viewed_at) AS next_path + FROM page_views pv + JOIN user_sessions us ON pv.session_id = us.id + WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false ) SELECT path, next_path, COUNT(*) as cnt FROM ordered @@ -582,21 +621,23 @@ def _tab_paths(db, start_date, days): """) transitions = db.execute(transitions_sql, {'start_dt': start_dt}).fetchall() - # Drop-off pages (high exit rate) + # Drop-off pages (high exit rate, exclude bots) dropoff_sql = text(""" WITH page_stats AS ( - SELECT path, COUNT(*) as total_views - FROM page_views - WHERE viewed_at >= :start_dt AND session_id IS NOT NULL - GROUP BY path HAVING COUNT(*) >= 5 + SELECT pv.path, COUNT(*) as total_views + FROM page_views pv + JOIN user_sessions us ON pv.session_id = us.id + WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false + GROUP BY pv.path HAVING COUNT(*) >= 5 ), exit_stats AS ( SELECT path, COUNT(*) as exit_count FROM ( - SELECT DISTINCT ON (session_id) path - FROM page_views - WHERE viewed_at >= :start_dt AND session_id IS NOT NULL - ORDER BY session_id, viewed_at DESC + SELECT DISTINCT ON (pv.session_id) pv.path + FROM page_views pv + JOIN user_sessions us ON pv.session_id = us.id + WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false + ORDER BY pv.session_id, pv.viewed_at DESC ) lp GROUP BY path ) @@ -609,7 +650,7 @@ def _tab_paths(db, start_date, days): """) dropoff = db.execute(dropoff_sql, {'start_dt': start_dt}).fetchall() - # Session length distribution + # Session length distribution (exclude bots) session_length_sql = text(""" SELECT CASE @@ -621,10 +662,11 @@ def _tab_paths(db, start_date, days): END as bucket, COUNT(*) as cnt FROM ( - SELECT session_id, COUNT(*) as pv_count - FROM page_views - WHERE viewed_at >= :start_dt AND session_id IS NOT NULL - GROUP BY session_id + SELECT pv.session_id, COUNT(*) as pv_count + FROM page_views pv + JOIN user_sessions us ON pv.session_id = us.id + WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false + GROUP BY pv.session_id ) session_counts GROUP BY bucket ORDER BY MIN(pv_count) @@ -651,14 +693,13 @@ def _tab_overview(db, start_date, days): start_dt = datetime.combine(start_date, datetime.min.time()) start_30d = datetime.combine(date.today() - timedelta(days=30), datetime.min.time()) - # Daily sessions + page views (30d) + # Daily sessions from analytics_daily (already bot-filtered after migration) daily_data = db.query(AnalyticsDaily).filter( AnalyticsDaily.date >= date.today() - timedelta(days=30) ).order_by(AnalyticsDaily.date).all() chart_labels = [] chart_sessions = [] - chart_pageviews = [] for d in daily_data: chart_labels.append(d.date.strftime('%d.%m')) if filter_type == 'logged': @@ -667,15 +708,36 @@ def _tab_overview(db, start_date, days): chart_sessions.append(d.anonymous_sessions or 0) else: chart_sessions.append(d.total_sessions or 0) - chart_pageviews.append(d.total_page_views or 0) - # Hourly heatmap (7 days x 24 hours) + # Daily page views from raw PageView + JOIN (bot-filtered, supports logged/anon filter) + pv_filter = [ + PageView.viewed_at >= start_30d, + UserSession.is_bot == False, + ] + if filter_type == 'logged': + pv_filter.append(UserSession.user_id.isnot(None)) + elif filter_type == 'anonymous': + pv_filter.append(UserSession.user_id.is_(None)) + + pv_daily = db.query( + func.date(PageView.viewed_at).label('day'), + func.count(PageView.id).label('cnt') + ).join(UserSession, PageView.session_id == UserSession.id).filter( + *pv_filter + ).group_by(func.date(PageView.viewed_at)).all() + + pv_by_date = {str(r.day): r.cnt for r in pv_daily} + chart_pageviews = [] + for d in daily_data: + chart_pageviews.append(pv_by_date.get(str(d.date), 0)) + + # Hourly heatmap (7 days x 24 hours, exclude bots) heatmap_sql = text(""" SELECT EXTRACT(DOW FROM started_at)::int as dow, EXTRACT(HOUR FROM started_at)::int as hour, COUNT(*) as cnt FROM user_sessions - WHERE started_at >= :start_dt + WHERE started_at >= :start_dt AND is_bot = false GROUP BY dow, hour """) heatmap_raw = db.execute(heatmap_sql, {'start_dt': start_30d}).fetchall() @@ -697,23 +759,25 @@ def _tab_overview(db, start_date, days): row['hours'].append({'count': cnt, 'intensity': intensity}) heatmap_grid.append(row) - # Logged vs Anonymous + # Logged vs Anonymous (exclude bots) total_logged = db.query(func.count(UserSession.id)).filter( UserSession.started_at >= start_30d, - UserSession.user_id.isnot(None) + UserSession.user_id.isnot(None), + UserSession.is_bot == False ).scalar() or 0 total_anon = db.query(func.count(UserSession.id)).filter( UserSession.started_at >= start_30d, - UserSession.user_id.is_(None) + UserSession.user_id.is_(None), + UserSession.is_bot == False ).scalar() or 0 - # Devices over time (weekly) + # Devices over time (weekly, exclude bots) devices_sql = text(""" SELECT DATE_TRUNC('week', started_at)::date as week, device_type, COUNT(*) as cnt FROM user_sessions - WHERE started_at >= :start_dt + WHERE started_at >= :start_dt AND is_bot = false GROUP BY week, device_type ORDER BY week """) @@ -793,13 +857,16 @@ def user_insights_profile(user_id): SearchQuery.user_id == user_id, SearchQuery.searched_at >= start_30d ).scalar() or 0 - engagement_score = min(100, int( - s30 * 3 + pv30 * 1 + int(clicks30) * 0.5 + - int(dur30) / 60 * 2 + conv30 * 10 + search30 * 2 - )) + raw = (s30 * 3 + pv30 * 1 + int(clicks30) * 0.5 + + int(dur30) / 60 * 2 + conv30 * 10 + search30 * 2) + engagement_score = _log_engagement_score(raw) - # Problem score - fl = user.failed_login_attempts or 0 + # Problem score (failed logins from audit_logs, time-based) + fl = db.query(func.count(AuditLog.id)).filter( + AuditLog.user_email == user.email, + AuditLog.action == 'login_failed', + AuditLog.created_at >= start_7d + ).scalar() or 0 sa_7d = db.query(func.count(SecurityAlert.id)).filter( SecurityAlert.user_email == user.email, SecurityAlert.created_at >= start_7d @@ -1166,7 +1233,7 @@ def user_insights_profile(user_id): PageView.viewed_at < d_end ).scalar() or 0 - daily_score = min(30, d_sessions * 3 + d_pv) + daily_score = _log_engagement_score(d_sessions * 3 + d_pv) trend_labels.append(d.strftime('%d.%m')) trend_scores.append(daily_score) diff --git a/database.py b/database.py index 007b4ab..122f026 100644 --- a/database.py +++ b/database.py @@ -4144,6 +4144,9 @@ class UserSession(Base): page_views_count = Column(Integer, default=0) clicks_count = Column(Integer, default=0) + # Bot detection + is_bot = Column(Boolean, default=False) + # UTM Parameters (kampanie marketingowe) utm_source = Column(String(255), nullable=True) # google, facebook, newsletter utm_medium = Column(String(255), nullable=True) # cpc, email, social, organic diff --git a/database/migrations/079_bot_filtering.sql b/database/migrations/079_bot_filtering.sql new file mode 100644 index 0000000..17f0cdf --- /dev/null +++ b/database/migrations/079_bot_filtering.sql @@ -0,0 +1,98 @@ +-- Migration 079: Bot Filtering for Analytics +-- Adds is_bot column to user_sessions, backfills from user_agent patterns, +-- updates analytics_daily trigger to exclude bots, recalculates 90 days of data. + +-- 1. Add column +ALTER TABLE user_sessions ADD COLUMN IF NOT EXISTS is_bot BOOLEAN DEFAULT false; + +-- 2. Backfill from user_agent patterns +UPDATE user_sessions SET is_bot = true +WHERE user_agent ILIKE '%bot%' + OR user_agent ILIKE '%crawler%' + OR user_agent ILIKE '%spider%' + OR user_agent ILIKE '%curl/%' + OR user_agent ILIKE '%python-requests%' + OR user_agent ILIKE '%axios/%' + OR user_agent ILIKE '%wget/%' + OR user_agent ILIKE '%Scrapy%' + OR user_agent ILIKE '%Java/%' + OR user_agent ILIKE '%Go-http%' + OR user_agent ILIKE '%Werkzeug%' + OR user_agent ILIKE '%LeakIx%' + OR user_agent ILIKE '%Nuclei%' + OR user_agent ILIKE '%masscan%' + OR user_agent ILIKE '%nmap%' + OR user_agent ILIKE '%zgrab%' + OR user_agent ILIKE '%httpx%' + OR user_agent ILIKE '%censys%' + OR user_agent ILIKE '%shodan%' + OR user_agent IS NULL; + +-- 3. Partial index for non-bot sessions (most queries filter on this) +CREATE INDEX IF NOT EXISTS idx_us_is_bot ON user_sessions(is_bot) WHERE is_bot = false; + +-- 4. Update analytics_daily trigger to skip bot sessions +CREATE OR REPLACE FUNCTION update_analytics_daily() +RETURNS TRIGGER AS $$ +DECLARE target_date DATE; +BEGIN + IF TG_TABLE_NAME = 'user_sessions' THEN + IF NEW.is_bot = true THEN RETURN NEW; END IF; + target_date := DATE(NEW.started_at); + ELSIF TG_TABLE_NAME = 'page_views' THEN + IF NEW.session_id IS NOT NULL THEN + IF EXISTS (SELECT 1 FROM user_sessions WHERE id = NEW.session_id AND is_bot = true) THEN + RETURN NEW; + END IF; + END IF; + target_date := DATE(NEW.viewed_at); + ELSE RETURN NEW; + END IF; + + INSERT INTO analytics_daily (date, total_sessions, total_page_views, updated_at) + VALUES (target_date, 0, 0, NOW()) ON CONFLICT (date) DO NOTHING; + + IF TG_TABLE_NAME = 'user_sessions' THEN + UPDATE analytics_daily SET + total_sessions = total_sessions + 1, + unique_users = (SELECT COUNT(DISTINCT user_id) FROM user_sessions + WHERE DATE(started_at) = target_date AND user_id IS NOT NULL AND is_bot = false), + anonymous_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = target_date AND user_id IS NULL AND is_bot = false), + desktop_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = target_date AND device_type = 'desktop' AND is_bot = false), + mobile_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = target_date AND device_type = 'mobile' AND is_bot = false), + tablet_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = target_date AND device_type = 'tablet' AND is_bot = false), + updated_at = NOW() + WHERE date = target_date; + ELSIF TG_TABLE_NAME = 'page_views' THEN + UPDATE analytics_daily SET total_page_views = total_page_views + 1, updated_at = NOW() + WHERE date = target_date; + END IF; + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- 5. Recalculate analytics_daily for last 90 days (remove bot contamination) +UPDATE analytics_daily ad SET + total_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = ad.date AND is_bot = false), + total_page_views = (SELECT COUNT(*) FROM page_views pv + JOIN user_sessions us ON pv.session_id = us.id + WHERE DATE(pv.viewed_at) = ad.date AND us.is_bot = false), + unique_users = (SELECT COUNT(DISTINCT user_id) FROM user_sessions + WHERE DATE(started_at) = ad.date AND user_id IS NOT NULL AND is_bot = false), + anonymous_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = ad.date AND user_id IS NULL AND is_bot = false), + desktop_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = ad.date AND device_type = 'desktop' AND is_bot = false), + mobile_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = ad.date AND device_type = 'mobile' AND is_bot = false), + tablet_sessions = (SELECT COUNT(*) FROM user_sessions + WHERE DATE(started_at) = ad.date AND device_type = 'tablet' AND is_bot = false) +WHERE ad.date >= CURRENT_DATE - 90; + +-- 6. Grants +GRANT ALL ON TABLE user_sessions TO nordabiz_app; diff --git a/utils/analytics.py b/utils/analytics.py index da3961a..44f9aed 100644 --- a/utils/analytics.py +++ b/utils/analytics.py @@ -54,12 +54,16 @@ def get_or_create_analytics_session(): browser_version = ua.browser.version_string os_name = ua.os.family os_version = ua.os.version_string + is_bot = ua.is_bot or any(p in ua_string.lower() for p in + ['curl/', 'python-requests', 'axios/', 'wget/', 'scrapy', + 'werkzeug', 'leakix', 'nuclei', 'masscan', 'zgrab', 'httpx']) except Exception: device_type = 'desktop' browser = 'Unknown' browser_version = '' os_name = 'Unknown' os_version = '' + is_bot = False user_session = UserSession( session_id=analytics_session_id, @@ -70,7 +74,8 @@ def get_or_create_analytics_session(): browser=browser[:50] if browser else None, browser_version=browser_version[:20] if browser_version else None, os=os_name[:50] if os_name else None, - os_version=os_version[:20] if os_version else None + os_version=os_version[:20] if os_version else None, + is_bot=is_bot ) db.add(user_session) db.commit() diff --git a/utils/middleware.py b/utils/middleware.py index fe71187..cafacde 100644 --- a/utils/middleware.py +++ b/utils/middleware.py @@ -77,6 +77,13 @@ def register_middleware(app): if request.path == '/favicon.ico': return + # Skip bot/AJAX utility paths + skip_exact = {'/robots.txt', '/sitemap.xml', '/manifest.json', + '/check-verification-status', '/resend-verification'} + skip_prefixes = ('/.well-known/',) + if request.path in skip_exact or any(request.path.startswith(p) for p in skip_prefixes): + return + try: from utils.analytics import ( track_page_view_for_request,