fix: filter bots from analytics, use audit_logs for failed logins, logarithmic engagement score
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

- Add is_bot column to user_sessions with backfill from user_agent patterns
- Update analytics_daily trigger to skip bot sessions
- Recalculate 90 days of analytics_daily without bot contamination
- Replace cumulative failed_login_attempts with time-based audit_logs queries
- Switch engagement score from linear (capped at 100) to log2 scale
- Expand section_map from 9 to 17 categories (~95% traffic coverage)
- Exclude robots.txt, sitemap.xml etc from page view tracking
- Add bot filter to all overview, pages, paths, and engagement queries

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-22 08:14:50 +01:00
parent fc9d979fea
commit cca52301a6
5 changed files with 262 additions and 82 deletions

View File

@ -9,6 +9,7 @@ page popularity, user flows, and behavioral profiles.
import csv
import io
import logging
import math
from datetime import date, timedelta, datetime
from flask import render_template, request, redirect, url_for, flash, Response
@ -27,6 +28,21 @@ from utils.decorators import role_required
logger = logging.getLogger(__name__)
def _non_bot_sessions(db, start_dt=None):
"""Subquery of non-bot session IDs for filtering page_views."""
q = db.query(UserSession.id).filter(UserSession.is_bot == False)
if start_dt:
q = q.filter(UserSession.started_at >= start_dt)
return q
def _log_engagement_score(raw):
"""Logarithmic engagement score: better distribution than linear capped at 100."""
if raw <= 0:
return 0
return min(100, int(math.log2(raw + 1) * 6))
def _get_period_dates(period):
"""Return (start_date, days) for given period string."""
today = date.today()
@ -95,11 +111,9 @@ def _tab_problems(db, start_date, days):
User.locked_until > now, User.is_active == True
).scalar() or 0
failed_logins_7d = db.query(
func.coalesce(func.sum(User.failed_login_attempts), 0)
).filter(
User.is_active == True,
User.failed_login_attempts > 0
failed_logins_7d = db.query(func.count(AuditLog.id)).filter(
AuditLog.action == 'login_failed',
AuditLog.created_at >= start_dt
).scalar() or 0
password_resets_7d = db.query(func.count(EmailLog.id)).filter(
@ -116,8 +130,12 @@ def _tab_problems(db, start_date, days):
problem_users = []
for user in users:
# Failed logins
fl = user.failed_login_attempts or 0
# Failed logins (from audit_logs, time-based)
fl = db.query(func.count(AuditLog.id)).filter(
AuditLog.user_email == user.email,
AuditLog.action == 'login_failed',
AuditLog.created_at >= start_dt
).scalar() or 0
# Security alerts 7d
sa_7d = db.query(func.count(SecurityAlert.id)).filter(
@ -292,7 +310,8 @@ def _tab_engagement(db, start_date, days):
# Stat cards
active_7d = db.query(func.count(func.distinct(UserSession.user_id))).filter(
UserSession.user_id.isnot(None),
UserSession.started_at >= start_dt
UserSession.started_at >= start_dt,
UserSession.is_bot == False
).scalar() or 0
all_users = db.query(User).filter(User.is_active == True).all()
@ -321,49 +340,57 @@ def _tab_engagement(db, start_date, days):
engagement_list = []
for user in registered_users:
# Current period
# Current period (exclude bots)
sessions_cur = db.query(func.count(UserSession.id)).filter(
UserSession.user_id == user.id,
UserSession.started_at >= start_dt
UserSession.started_at >= start_dt,
UserSession.is_bot == False
).scalar() or 0
pv_cur = db.query(func.count(PageView.id)).filter(
PageView.user_id == user.id,
PageView.viewed_at >= start_dt
PageView.viewed_at >= start_dt,
PageView.session_id.in_(_non_bot_sessions(db, start_dt))
).scalar() or 0
# Previous period for WoW
sessions_prev = db.query(func.count(UserSession.id)).filter(
UserSession.user_id == user.id,
UserSession.started_at >= prev_start,
UserSession.started_at < start_dt
UserSession.started_at < start_dt,
UserSession.is_bot == False
).scalar() or 0
pv_prev = db.query(func.count(PageView.id)).filter(
PageView.user_id == user.id,
PageView.viewed_at >= prev_start,
PageView.viewed_at < start_dt
PageView.viewed_at < start_dt,
PageView.session_id.in_(_non_bot_sessions(db, prev_start))
).scalar() or 0
# 30d engagement score components
# 30d engagement score components (exclude bots)
s30 = db.query(func.count(UserSession.id)).filter(
UserSession.user_id == user.id,
UserSession.started_at >= start_30d
UserSession.started_at >= start_30d,
UserSession.is_bot == False
).scalar() or 0
pv30 = db.query(func.count(PageView.id)).filter(
PageView.user_id == user.id,
PageView.viewed_at >= start_30d
PageView.viewed_at >= start_30d,
PageView.session_id.in_(_non_bot_sessions(db, start_30d))
).scalar() or 0
clicks30 = db.query(func.sum(UserSession.clicks_count)).filter(
UserSession.user_id == user.id,
UserSession.started_at >= start_30d
UserSession.started_at >= start_30d,
UserSession.is_bot == False
).scalar() or 0
dur30 = db.query(func.sum(UserSession.duration_seconds)).filter(
UserSession.user_id == user.id,
UserSession.started_at >= start_30d
UserSession.started_at >= start_30d,
UserSession.is_bot == False
).scalar() or 0
conv30 = db.query(func.count(ConversionEvent.id)).filter(
@ -376,11 +403,9 @@ def _tab_engagement(db, start_date, days):
SearchQuery.searched_at >= start_30d
).scalar() or 0
score = min(100,
s30 * 3 + pv30 * 1 + int(clicks30) * 0.5 +
int(dur30) / 60 * 2 + conv30 * 10 + search30 * 2
)
score = int(score)
raw = (s30 * 3 + pv30 * 1 + int(clicks30) * 0.5 +
int(dur30) / 60 * 2 + conv30 * 10 + search30 * 2)
score = _log_engagement_score(raw)
# WoW change
wow = None
@ -444,16 +469,24 @@ def _tab_pages(db, start_date, days):
"""Page popularity map."""
start_dt = datetime.combine(start_date, datetime.min.time())
# Page sections with grouping
# Page sections with grouping (expanded to cover ~95% of traffic)
section_map = {
'Strona główna': ['/'],
'Profile firm': ['/company/'],
'Forum': ['/forum'],
'Chat': ['/chat'],
'Wyszukiwarka': ['/search', '/szukaj'],
'Wydarzenia': ['/events', '/wydarzenia'],
'Ogłoszenia': ['/classifieds', '/ogloszenia'],
'Członkostwo': ['/membership', '/czlonkostwo'],
'Wydarzenia': ['/events', '/wydarzenia', '/kalendarz'],
'Ogłoszenia': ['/classifieds', '/ogloszenia', '/tablica'],
'Członkostwo': ['/membership', '/czlonkostwo', '/korzysci'],
'Logowanie': ['/login', '/register', '/forgot-password', '/reset-password', '/verify-email'],
'Panel użytkownika': ['/dashboard', '/konto'],
'Wiadomości': ['/wiadomosci'],
'Edukacja': ['/edukacja'],
'Rada': ['/rada'],
'ZOPK': ['/zopk'],
'Kontakty': ['/kontakty'],
'Raporty': ['/raporty'],
'Admin': ['/admin'],
}
@ -467,9 +500,10 @@ def _tab_pages(db, start_date, days):
func.count(PageView.id).label('views'),
func.count(func.distinct(PageView.user_id)).label('unique_users'),
func.avg(PageView.time_on_page_seconds).label('avg_time')
).filter(
).join(UserSession, PageView.session_id == UserSession.id).filter(
or_(*conditions),
PageView.viewed_at >= start_dt
PageView.viewed_at >= start_dt,
UserSession.is_bot == False
).first()
sections.append({
@ -484,7 +518,7 @@ def _tab_pages(db, start_date, days):
for s in sections:
s['intensity'] = min(100, int(s['views'] / max_views * 100))
# Top 50 pages
# Top 50 pages (exclude bots)
top_pages = db.query(
PageView.path,
func.count(PageView.id).label('views'),
@ -492,8 +526,9 @@ def _tab_pages(db, start_date, days):
func.avg(PageView.time_on_page_seconds).label('avg_time'),
func.avg(PageView.scroll_depth_percent).label('avg_scroll'),
func.avg(PageView.load_time_ms).label('avg_load'),
).filter(
PageView.viewed_at >= start_dt
).join(UserSession, PageView.session_id == UserSession.id).filter(
PageView.viewed_at >= start_dt,
UserSession.is_bot == False
).group_by(PageView.path).order_by(desc('views')).limit(50).all()
max_page_views = top_pages[0].views if top_pages else 1
@ -510,13 +545,14 @@ def _tab_pages(db, start_date, days):
'bar_pct': int(p.views / max_page_views * 100),
})
# Ignored pages (< 5 views in 30d)
# Ignored pages (< 5 views in 30d, exclude bots)
start_30d = datetime.combine(date.today() - timedelta(days=30), datetime.min.time())
ignored = db.query(
PageView.path,
func.count(PageView.id).label('views'),
).filter(
PageView.viewed_at >= start_30d
).join(UserSession, PageView.session_id == UserSession.id).filter(
PageView.viewed_at >= start_30d,
UserSession.is_bot == False
).group_by(PageView.path).having(
func.count(PageView.id) < 5
).order_by('views').limit(30).all()
@ -536,13 +572,14 @@ def _tab_paths(db, start_date, days):
"""User flow analysis."""
start_dt = datetime.combine(start_date, datetime.min.time())
# Entry pages - first page in each session
# Entry pages - first page in each session (exclude bots)
entry_sql = text("""
WITH first_pages AS (
SELECT DISTINCT ON (session_id) path
FROM page_views
WHERE viewed_at >= :start_dt AND session_id IS NOT NULL
ORDER BY session_id, viewed_at ASC
SELECT DISTINCT ON (pv.session_id) pv.path
FROM page_views pv
JOIN user_sessions us ON pv.session_id = us.id
WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false
ORDER BY pv.session_id, pv.viewed_at ASC
)
SELECT path, COUNT(*) as cnt
FROM first_pages
@ -550,13 +587,14 @@ def _tab_paths(db, start_date, days):
""")
entry_pages = db.execute(entry_sql, {'start_dt': start_dt}).fetchall()
# Exit pages - last page in each session
# Exit pages - last page in each session (exclude bots)
exit_sql = text("""
WITH last_pages AS (
SELECT DISTINCT ON (session_id) path
FROM page_views
WHERE viewed_at >= :start_dt AND session_id IS NOT NULL
ORDER BY session_id, viewed_at DESC
SELECT DISTINCT ON (pv.session_id) pv.path
FROM page_views pv
JOIN user_sessions us ON pv.session_id = us.id
WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false
ORDER BY pv.session_id, pv.viewed_at DESC
)
SELECT path, COUNT(*) as cnt
FROM last_pages
@ -567,13 +605,14 @@ def _tab_paths(db, start_date, days):
max_entry = entry_pages[0].cnt if entry_pages else 1
max_exit = exit_pages[0].cnt if exit_pages else 1
# Top transitions
# Top transitions (exclude bots)
transitions_sql = text("""
WITH ordered AS (
SELECT session_id, path,
LEAD(path) OVER (PARTITION BY session_id ORDER BY viewed_at) AS next_path
FROM page_views
WHERE viewed_at >= :start_dt AND session_id IS NOT NULL
SELECT pv.session_id, pv.path,
LEAD(pv.path) OVER (PARTITION BY pv.session_id ORDER BY pv.viewed_at) AS next_path
FROM page_views pv
JOIN user_sessions us ON pv.session_id = us.id
WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false
)
SELECT path, next_path, COUNT(*) as cnt
FROM ordered
@ -582,21 +621,23 @@ def _tab_paths(db, start_date, days):
""")
transitions = db.execute(transitions_sql, {'start_dt': start_dt}).fetchall()
# Drop-off pages (high exit rate)
# Drop-off pages (high exit rate, exclude bots)
dropoff_sql = text("""
WITH page_stats AS (
SELECT path, COUNT(*) as total_views
FROM page_views
WHERE viewed_at >= :start_dt AND session_id IS NOT NULL
GROUP BY path HAVING COUNT(*) >= 5
SELECT pv.path, COUNT(*) as total_views
FROM page_views pv
JOIN user_sessions us ON pv.session_id = us.id
WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false
GROUP BY pv.path HAVING COUNT(*) >= 5
),
exit_stats AS (
SELECT path, COUNT(*) as exit_count
FROM (
SELECT DISTINCT ON (session_id) path
FROM page_views
WHERE viewed_at >= :start_dt AND session_id IS NOT NULL
ORDER BY session_id, viewed_at DESC
SELECT DISTINCT ON (pv.session_id) pv.path
FROM page_views pv
JOIN user_sessions us ON pv.session_id = us.id
WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false
ORDER BY pv.session_id, pv.viewed_at DESC
) lp
GROUP BY path
)
@ -609,7 +650,7 @@ def _tab_paths(db, start_date, days):
""")
dropoff = db.execute(dropoff_sql, {'start_dt': start_dt}).fetchall()
# Session length distribution
# Session length distribution (exclude bots)
session_length_sql = text("""
SELECT
CASE
@ -621,10 +662,11 @@ def _tab_paths(db, start_date, days):
END as bucket,
COUNT(*) as cnt
FROM (
SELECT session_id, COUNT(*) as pv_count
FROM page_views
WHERE viewed_at >= :start_dt AND session_id IS NOT NULL
GROUP BY session_id
SELECT pv.session_id, COUNT(*) as pv_count
FROM page_views pv
JOIN user_sessions us ON pv.session_id = us.id
WHERE pv.viewed_at >= :start_dt AND pv.session_id IS NOT NULL AND us.is_bot = false
GROUP BY pv.session_id
) session_counts
GROUP BY bucket
ORDER BY MIN(pv_count)
@ -651,14 +693,13 @@ def _tab_overview(db, start_date, days):
start_dt = datetime.combine(start_date, datetime.min.time())
start_30d = datetime.combine(date.today() - timedelta(days=30), datetime.min.time())
# Daily sessions + page views (30d)
# Daily sessions from analytics_daily (already bot-filtered after migration)
daily_data = db.query(AnalyticsDaily).filter(
AnalyticsDaily.date >= date.today() - timedelta(days=30)
).order_by(AnalyticsDaily.date).all()
chart_labels = []
chart_sessions = []
chart_pageviews = []
for d in daily_data:
chart_labels.append(d.date.strftime('%d.%m'))
if filter_type == 'logged':
@ -667,15 +708,36 @@ def _tab_overview(db, start_date, days):
chart_sessions.append(d.anonymous_sessions or 0)
else:
chart_sessions.append(d.total_sessions or 0)
chart_pageviews.append(d.total_page_views or 0)
# Hourly heatmap (7 days x 24 hours)
# Daily page views from raw PageView + JOIN (bot-filtered, supports logged/anon filter)
pv_filter = [
PageView.viewed_at >= start_30d,
UserSession.is_bot == False,
]
if filter_type == 'logged':
pv_filter.append(UserSession.user_id.isnot(None))
elif filter_type == 'anonymous':
pv_filter.append(UserSession.user_id.is_(None))
pv_daily = db.query(
func.date(PageView.viewed_at).label('day'),
func.count(PageView.id).label('cnt')
).join(UserSession, PageView.session_id == UserSession.id).filter(
*pv_filter
).group_by(func.date(PageView.viewed_at)).all()
pv_by_date = {str(r.day): r.cnt for r in pv_daily}
chart_pageviews = []
for d in daily_data:
chart_pageviews.append(pv_by_date.get(str(d.date), 0))
# Hourly heatmap (7 days x 24 hours, exclude bots)
heatmap_sql = text("""
SELECT EXTRACT(DOW FROM started_at)::int as dow,
EXTRACT(HOUR FROM started_at)::int as hour,
COUNT(*) as cnt
FROM user_sessions
WHERE started_at >= :start_dt
WHERE started_at >= :start_dt AND is_bot = false
GROUP BY dow, hour
""")
heatmap_raw = db.execute(heatmap_sql, {'start_dt': start_30d}).fetchall()
@ -697,23 +759,25 @@ def _tab_overview(db, start_date, days):
row['hours'].append({'count': cnt, 'intensity': intensity})
heatmap_grid.append(row)
# Logged vs Anonymous
# Logged vs Anonymous (exclude bots)
total_logged = db.query(func.count(UserSession.id)).filter(
UserSession.started_at >= start_30d,
UserSession.user_id.isnot(None)
UserSession.user_id.isnot(None),
UserSession.is_bot == False
).scalar() or 0
total_anon = db.query(func.count(UserSession.id)).filter(
UserSession.started_at >= start_30d,
UserSession.user_id.is_(None)
UserSession.user_id.is_(None),
UserSession.is_bot == False
).scalar() or 0
# Devices over time (weekly)
# Devices over time (weekly, exclude bots)
devices_sql = text("""
SELECT DATE_TRUNC('week', started_at)::date as week,
device_type,
COUNT(*) as cnt
FROM user_sessions
WHERE started_at >= :start_dt
WHERE started_at >= :start_dt AND is_bot = false
GROUP BY week, device_type
ORDER BY week
""")
@ -793,13 +857,16 @@ def user_insights_profile(user_id):
SearchQuery.user_id == user_id, SearchQuery.searched_at >= start_30d
).scalar() or 0
engagement_score = min(100, int(
s30 * 3 + pv30 * 1 + int(clicks30) * 0.5 +
int(dur30) / 60 * 2 + conv30 * 10 + search30 * 2
))
raw = (s30 * 3 + pv30 * 1 + int(clicks30) * 0.5 +
int(dur30) / 60 * 2 + conv30 * 10 + search30 * 2)
engagement_score = _log_engagement_score(raw)
# Problem score
fl = user.failed_login_attempts or 0
# Problem score (failed logins from audit_logs, time-based)
fl = db.query(func.count(AuditLog.id)).filter(
AuditLog.user_email == user.email,
AuditLog.action == 'login_failed',
AuditLog.created_at >= start_7d
).scalar() or 0
sa_7d = db.query(func.count(SecurityAlert.id)).filter(
SecurityAlert.user_email == user.email,
SecurityAlert.created_at >= start_7d
@ -1166,7 +1233,7 @@ def user_insights_profile(user_id):
PageView.viewed_at < d_end
).scalar() or 0
daily_score = min(30, d_sessions * 3 + d_pv)
daily_score = _log_engagement_score(d_sessions * 3 + d_pv)
trend_labels.append(d.strftime('%d.%m'))
trend_scores.append(daily_score)

View File

@ -4144,6 +4144,9 @@ class UserSession(Base):
page_views_count = Column(Integer, default=0)
clicks_count = Column(Integer, default=0)
# Bot detection
is_bot = Column(Boolean, default=False)
# UTM Parameters (kampanie marketingowe)
utm_source = Column(String(255), nullable=True) # google, facebook, newsletter
utm_medium = Column(String(255), nullable=True) # cpc, email, social, organic

View File

@ -0,0 +1,98 @@
-- Migration 079: Bot Filtering for Analytics
-- Adds is_bot column to user_sessions, backfills from user_agent patterns,
-- updates analytics_daily trigger to exclude bots, recalculates 90 days of data.
-- 1. Add column
ALTER TABLE user_sessions ADD COLUMN IF NOT EXISTS is_bot BOOLEAN DEFAULT false;
-- 2. Backfill from user_agent patterns
UPDATE user_sessions SET is_bot = true
WHERE user_agent ILIKE '%bot%'
OR user_agent ILIKE '%crawler%'
OR user_agent ILIKE '%spider%'
OR user_agent ILIKE '%curl/%'
OR user_agent ILIKE '%python-requests%'
OR user_agent ILIKE '%axios/%'
OR user_agent ILIKE '%wget/%'
OR user_agent ILIKE '%Scrapy%'
OR user_agent ILIKE '%Java/%'
OR user_agent ILIKE '%Go-http%'
OR user_agent ILIKE '%Werkzeug%'
OR user_agent ILIKE '%LeakIx%'
OR user_agent ILIKE '%Nuclei%'
OR user_agent ILIKE '%masscan%'
OR user_agent ILIKE '%nmap%'
OR user_agent ILIKE '%zgrab%'
OR user_agent ILIKE '%httpx%'
OR user_agent ILIKE '%censys%'
OR user_agent ILIKE '%shodan%'
OR user_agent IS NULL;
-- 3. Partial index for non-bot sessions (most queries filter on this)
CREATE INDEX IF NOT EXISTS idx_us_is_bot ON user_sessions(is_bot) WHERE is_bot = false;
-- 4. Update analytics_daily trigger to skip bot sessions
CREATE OR REPLACE FUNCTION update_analytics_daily()
RETURNS TRIGGER AS $$
DECLARE target_date DATE;
BEGIN
IF TG_TABLE_NAME = 'user_sessions' THEN
IF NEW.is_bot = true THEN RETURN NEW; END IF;
target_date := DATE(NEW.started_at);
ELSIF TG_TABLE_NAME = 'page_views' THEN
IF NEW.session_id IS NOT NULL THEN
IF EXISTS (SELECT 1 FROM user_sessions WHERE id = NEW.session_id AND is_bot = true) THEN
RETURN NEW;
END IF;
END IF;
target_date := DATE(NEW.viewed_at);
ELSE RETURN NEW;
END IF;
INSERT INTO analytics_daily (date, total_sessions, total_page_views, updated_at)
VALUES (target_date, 0, 0, NOW()) ON CONFLICT (date) DO NOTHING;
IF TG_TABLE_NAME = 'user_sessions' THEN
UPDATE analytics_daily SET
total_sessions = total_sessions + 1,
unique_users = (SELECT COUNT(DISTINCT user_id) FROM user_sessions
WHERE DATE(started_at) = target_date AND user_id IS NOT NULL AND is_bot = false),
anonymous_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = target_date AND user_id IS NULL AND is_bot = false),
desktop_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = target_date AND device_type = 'desktop' AND is_bot = false),
mobile_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = target_date AND device_type = 'mobile' AND is_bot = false),
tablet_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = target_date AND device_type = 'tablet' AND is_bot = false),
updated_at = NOW()
WHERE date = target_date;
ELSIF TG_TABLE_NAME = 'page_views' THEN
UPDATE analytics_daily SET total_page_views = total_page_views + 1, updated_at = NOW()
WHERE date = target_date;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- 5. Recalculate analytics_daily for last 90 days (remove bot contamination)
UPDATE analytics_daily ad SET
total_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = ad.date AND is_bot = false),
total_page_views = (SELECT COUNT(*) FROM page_views pv
JOIN user_sessions us ON pv.session_id = us.id
WHERE DATE(pv.viewed_at) = ad.date AND us.is_bot = false),
unique_users = (SELECT COUNT(DISTINCT user_id) FROM user_sessions
WHERE DATE(started_at) = ad.date AND user_id IS NOT NULL AND is_bot = false),
anonymous_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = ad.date AND user_id IS NULL AND is_bot = false),
desktop_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = ad.date AND device_type = 'desktop' AND is_bot = false),
mobile_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = ad.date AND device_type = 'mobile' AND is_bot = false),
tablet_sessions = (SELECT COUNT(*) FROM user_sessions
WHERE DATE(started_at) = ad.date AND device_type = 'tablet' AND is_bot = false)
WHERE ad.date >= CURRENT_DATE - 90;
-- 6. Grants
GRANT ALL ON TABLE user_sessions TO nordabiz_app;

View File

@ -54,12 +54,16 @@ def get_or_create_analytics_session():
browser_version = ua.browser.version_string
os_name = ua.os.family
os_version = ua.os.version_string
is_bot = ua.is_bot or any(p in ua_string.lower() for p in
['curl/', 'python-requests', 'axios/', 'wget/', 'scrapy',
'werkzeug', 'leakix', 'nuclei', 'masscan', 'zgrab', 'httpx'])
except Exception:
device_type = 'desktop'
browser = 'Unknown'
browser_version = ''
os_name = 'Unknown'
os_version = ''
is_bot = False
user_session = UserSession(
session_id=analytics_session_id,
@ -70,7 +74,8 @@ def get_or_create_analytics_session():
browser=browser[:50] if browser else None,
browser_version=browser_version[:20] if browser_version else None,
os=os_name[:50] if os_name else None,
os_version=os_version[:20] if os_version else None
os_version=os_version[:20] if os_version else None,
is_bot=is_bot
)
db.add(user_session)
db.commit()

View File

@ -77,6 +77,13 @@ def register_middleware(app):
if request.path == '/favicon.ico':
return
# Skip bot/AJAX utility paths
skip_exact = {'/robots.txt', '/sitemap.xml', '/manifest.json',
'/check-verification-status', '/resend-verification'}
skip_prefixes = ('/.well-known/',)
if request.path in skip_exact or any(request.path.startswith(p) for p in skip_prefixes):
return
try:
from utils.analytics import (
track_page_view_for_request,