fix(social-audit): Fix Facebook profile.php URLs being saved without ID
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

The regex was capturing 'profile.php' as a username instead of extracting
the numeric ID from profile.php?id=XXX links. Added dedicated pattern for
profile.php URLs and added profile.php to exclusion list.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-06 18:03:01 +01:00
parent b2c73d027d
commit 14ce54d8b5

View File

@ -115,6 +115,7 @@ HOSTING_PROVIDERS = {
# Social media patterns
SOCIAL_MEDIA_PATTERNS = {
'facebook': [
r'(?:https?://)?(?:www\.)?facebook\.com/profile\.php\?id=(\d+)',
r'(?:https?://)?(?:www\.)?facebook\.com/([^/?\s"\'<>]+)',
r'(?:https?://)?(?:www\.)?fb\.com/([^/?\s"\'<>]+)',
],
@ -139,7 +140,7 @@ SOCIAL_MEDIA_PATTERNS = {
# False positives to exclude
SOCIAL_MEDIA_EXCLUDE = {
'facebook': ['sharer', 'share', 'intent', 'plugins', 'dialog', 'sharer.php', 'login', 'pages', 'boldthemes'],
'facebook': ['sharer', 'share', 'intent', 'plugins', 'dialog', 'sharer.php', 'login', 'pages', 'boldthemes', 'profile.php', 'profile', 'watch', 'groups', 'events', 'marketplace', 'gaming', 'stories'],
'instagram': ['explore', 'accounts', 'p', 'reel'],
'youtube': ['embed', 'watch', 'playlist', 'results', 'feed', 'channel', 'c', 'user', '@', 'about', 'featured', 'videos', 'shorts', 'streams', 'playlists', 'community', 'channels', 'store'],
'linkedin': ['shareArticle', 'share', 'login'],
@ -452,7 +453,10 @@ class WebsiteAuditor:
if match.lower() not in excludes:
# Construct full URL
if platform == 'facebook':
url = f'https://facebook.com/{match}'
if match.isdigit():
url = f'https://facebook.com/profile.php?id={match}'
else:
url = f'https://facebook.com/{match}'
elif platform == 'instagram':
url = f'https://instagram.com/{match}'
elif platform == 'youtube':