fix: Remove Brave Search from social media audit — too many false positives
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions

Brave Search matched unrelated companies by name token (e.g. VINDOR matched
vindorclothing, vindormusic, beautybyneyador). Social media profiles are now
sourced only from website scraping and manual admin entry.

- Disabled BraveSearcher initialization and call in audit_company()
- Removed Brave Search step from audit progress animation
- Updated missing profile message with explanation and link to profile editor
- Added migration 071 to clean up existing brave_search entries

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-02-18 18:20:52 +01:00
parent f2f65abca2
commit ce9513b4bb
3 changed files with 26 additions and 40 deletions

View File

@ -0,0 +1,13 @@
-- Migration 071: Remove false positive social media entries from Brave Search
-- Brave Search produced many false positives (matching unrelated companies by name token).
-- Social media profiles are now sourced only from website scraping and manual admin entry.
-- First, let's see what we're deleting (for audit purposes)
-- SELECT company_id, platform, url, source, verified_at
-- FROM company_social_media
-- WHERE source = 'brave_search'
-- ORDER BY company_id, platform;
-- Delete all Brave Search entries
DELETE FROM company_social_media
WHERE source = 'brave_search';

View File

@ -1302,7 +1302,9 @@ class SocialMediaAuditor:
self.engine = create_engine(database_url)
self.Session = sessionmaker(bind=self.engine)
self.website_auditor = WebsiteAuditor()
self.brave_searcher = BraveSearcher()
# Brave Search disabled — too many false positives (matches unrelated companies by name)
# Social media profiles are now sourced only from website scraping and manual admin entry
# self.brave_searcher = BraveSearcher()
self.profile_enricher = SocialProfileEnricher()
# Initialize Google Places searcher if API key is available
@ -1402,24 +1404,11 @@ class SocialMediaAuditor:
else:
logger.info("No social media links found on website")
# 3. Search for additional social media via Brave
city = company.get('address_city', 'Wejherowo')
try:
logger.info(f"Searching Brave for social media: {company['name']} in {city}")
brave_social = self.brave_searcher.search_social_media(company['name'], city)
if brave_social:
logger.info(f"Brave search found: {list(brave_social.keys())}")
else:
logger.info("Brave search found no additional social media")
# Merge, website takes precedence
for platform, url in brave_social.items():
if platform not in website_social:
website_social[platform] = url
social_sources[platform] = 'brave_search'
logger.info(f"Added {platform} from Brave search: {url}")
except Exception as e:
logger.warning(f"Brave search failed: {str(e)}")
result['errors'].append(f'Brave search failed: {str(e)}')
# 3. Brave Search disabled — too many false positives
# Social media profiles are now sourced only from:
# - Website scraping (automatic, from company's own website)
# - Manual admin entry (via company profile editing)
# - OAuth API (Facebook Graph API for verified data)
result['social_media'] = website_social
result['social_sources'] = social_sources

View File

@ -550,7 +550,6 @@
}
.source-tag.website { background: #6366f1; color: white; }
.source-tag.brave { background: #fb542b; color: white; }
.source-tag.google { background: #4285f4; color: white; }
.source-tag.facebook { background: #1877f2; color: white; }
.source-tag.instagram { background: #e4405f; color: white; }
@ -857,7 +856,7 @@
<div style="display: flex; align-items: center; gap: var(--spacing-xs); margin-bottom: var(--spacing-xs);">
<span style="font-size: var(--font-size-xs); color: var(--text-tertiary);">Źródło:</span>
<span style="font-size: var(--font-size-xs); padding: 1px 6px; border-radius: var(--radius-sm); background: #f3f4f6; color: #6b7280;">
{% if profile.source == 'website_scrape' %}Ze strony WWW{% elif profile.source == 'brave_search' %}Wyszukiwarka{% elif profile.source == 'manual' %}Ręcznie{% elif profile.source == 'facebook_api' %}Facebook API{% else %}{{ profile.source }}{% endif %}
{% if profile.source == 'website_scrape' %}Ze strony WWW{% elif profile.source == 'manual' %}Ręcznie{% elif profile.source == 'facebook_api' %}Facebook API{% else %}{{ profile.source }}{% endif %}
</span>
</div>
{% endif %}
@ -1007,7 +1006,8 @@
</div>
{% endif %}
{% else %}
<p class="platform-missing-text">Nie znaleziono profilu na tej platformie</p>
<p class="platform-missing-text">Profil nie został wykryty automatycznie na stronie internetowej firmy.</p>
<p style="font-size: var(--font-size-xs); color: var(--text-tertiary); margin-top: var(--spacing-xs);">Jeśli firma posiada profil na tej platformie, można go dodać ręcznie w <a href="{{ url_for('public.company_edit', company_id=company.id) }}" style="color: var(--primary); text-decoration: underline;">edycji profilu firmy</a>.</p>
{% endif %}
</div>
</div>
@ -1186,17 +1186,7 @@
<span class="step-text in_progress">Skanuję stronę WWW <span class="source-tag website">WWW</span></span>
</div>
<!-- Step 2: Brave Search -->
<div class="loading-step" id="step-brave">
<div class="step-icon pending">
<svg width="18" height="18" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<circle cx="12" cy="12" r="10" stroke-width="2"/>
</svg>
</div>
<span class="step-text pending">Szukam w Brave Search <span class="source-tag brave">BRAVE</span></span>
</div>
<!-- Step 3: Facebook -->
<!-- Step 2: Facebook -->
<div class="loading-step" id="step-facebook">
<div class="step-icon pending">
<svg width="18" height="18" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@ -1301,7 +1291,7 @@ const companySlug = '{{ company.slug }}';
// All step IDs in order
const allSteps = [
'step-website', 'step-brave',
'step-website',
'step-facebook', 'step-instagram', 'step-linkedin',
'step-youtube', 'step-twitter', 'step-tiktok',
'step-google', 'step-save'
@ -1399,12 +1389,6 @@ async function animatePlatformSteps(foundPlatforms, googleData) {
updateStep('step-website', 'complete', 'Skanowanie strony WWW zakończone <span class="source-tag website">WWW</span>');
await new Promise(r => setTimeout(r, delay));
// Brave Search complete
updateStep('step-brave', 'in_progress', 'Przeszukuję Brave Search... <span class="source-tag brave">BRAVE</span>');
await new Promise(r => setTimeout(r, delay));
updateStep('step-brave', 'complete', 'Wyszukiwanie dodatkowych profili zakończone <span class="source-tag brave">BRAVE</span>');
await new Promise(r => setTimeout(r, delay / 2));
// Process each platform
const platforms = ['facebook', 'instagram', 'linkedin', 'youtube', 'twitter', 'tiktok'];
for (const platform of platforms) {