fix: Remove Brave Search from social media audit — too many false positives
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Brave Search matched unrelated companies by name token (e.g. VINDOR matched vindorclothing, vindormusic, beautybyneyador). Social media profiles are now sourced only from website scraping and manual admin entry. - Disabled BraveSearcher initialization and call in audit_company() - Removed Brave Search step from audit progress animation - Updated missing profile message with explanation and link to profile editor - Added migration 071 to clean up existing brave_search entries Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f2f65abca2
commit
ce9513b4bb
13
database/migrations/071_remove_brave_search_social_media.sql
Normal file
13
database/migrations/071_remove_brave_search_social_media.sql
Normal file
@ -0,0 +1,13 @@
|
||||
-- Migration 071: Remove false positive social media entries from Brave Search
|
||||
-- Brave Search produced many false positives (matching unrelated companies by name token).
|
||||
-- Social media profiles are now sourced only from website scraping and manual admin entry.
|
||||
|
||||
-- First, let's see what we're deleting (for audit purposes)
|
||||
-- SELECT company_id, platform, url, source, verified_at
|
||||
-- FROM company_social_media
|
||||
-- WHERE source = 'brave_search'
|
||||
-- ORDER BY company_id, platform;
|
||||
|
||||
-- Delete all Brave Search entries
|
||||
DELETE FROM company_social_media
|
||||
WHERE source = 'brave_search';
|
||||
@ -1302,7 +1302,9 @@ class SocialMediaAuditor:
|
||||
self.engine = create_engine(database_url)
|
||||
self.Session = sessionmaker(bind=self.engine)
|
||||
self.website_auditor = WebsiteAuditor()
|
||||
self.brave_searcher = BraveSearcher()
|
||||
# Brave Search disabled — too many false positives (matches unrelated companies by name)
|
||||
# Social media profiles are now sourced only from website scraping and manual admin entry
|
||||
# self.brave_searcher = BraveSearcher()
|
||||
self.profile_enricher = SocialProfileEnricher()
|
||||
|
||||
# Initialize Google Places searcher if API key is available
|
||||
@ -1402,24 +1404,11 @@ class SocialMediaAuditor:
|
||||
else:
|
||||
logger.info("No social media links found on website")
|
||||
|
||||
# 3. Search for additional social media via Brave
|
||||
city = company.get('address_city', 'Wejherowo')
|
||||
try:
|
||||
logger.info(f"Searching Brave for social media: {company['name']} in {city}")
|
||||
brave_social = self.brave_searcher.search_social_media(company['name'], city)
|
||||
if brave_social:
|
||||
logger.info(f"Brave search found: {list(brave_social.keys())}")
|
||||
else:
|
||||
logger.info("Brave search found no additional social media")
|
||||
# Merge, website takes precedence
|
||||
for platform, url in brave_social.items():
|
||||
if platform not in website_social:
|
||||
website_social[platform] = url
|
||||
social_sources[platform] = 'brave_search'
|
||||
logger.info(f"Added {platform} from Brave search: {url}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Brave search failed: {str(e)}")
|
||||
result['errors'].append(f'Brave search failed: {str(e)}')
|
||||
# 3. Brave Search disabled — too many false positives
|
||||
# Social media profiles are now sourced only from:
|
||||
# - Website scraping (automatic, from company's own website)
|
||||
# - Manual admin entry (via company profile editing)
|
||||
# - OAuth API (Facebook Graph API for verified data)
|
||||
|
||||
result['social_media'] = website_social
|
||||
result['social_sources'] = social_sources
|
||||
|
||||
@ -550,7 +550,6 @@
|
||||
}
|
||||
|
||||
.source-tag.website { background: #6366f1; color: white; }
|
||||
.source-tag.brave { background: #fb542b; color: white; }
|
||||
.source-tag.google { background: #4285f4; color: white; }
|
||||
.source-tag.facebook { background: #1877f2; color: white; }
|
||||
.source-tag.instagram { background: #e4405f; color: white; }
|
||||
@ -857,7 +856,7 @@
|
||||
<div style="display: flex; align-items: center; gap: var(--spacing-xs); margin-bottom: var(--spacing-xs);">
|
||||
<span style="font-size: var(--font-size-xs); color: var(--text-tertiary);">Źródło:</span>
|
||||
<span style="font-size: var(--font-size-xs); padding: 1px 6px; border-radius: var(--radius-sm); background: #f3f4f6; color: #6b7280;">
|
||||
{% if profile.source == 'website_scrape' %}Ze strony WWW{% elif profile.source == 'brave_search' %}Wyszukiwarka{% elif profile.source == 'manual' %}Ręcznie{% elif profile.source == 'facebook_api' %}Facebook API{% else %}{{ profile.source }}{% endif %}
|
||||
{% if profile.source == 'website_scrape' %}Ze strony WWW{% elif profile.source == 'manual' %}Ręcznie{% elif profile.source == 'facebook_api' %}Facebook API{% else %}{{ profile.source }}{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
@ -1007,7 +1006,8 @@
|
||||
</div>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<p class="platform-missing-text">Nie znaleziono profilu na tej platformie</p>
|
||||
<p class="platform-missing-text">Profil nie został wykryty automatycznie na stronie internetowej firmy.</p>
|
||||
<p style="font-size: var(--font-size-xs); color: var(--text-tertiary); margin-top: var(--spacing-xs);">Jeśli firma posiada profil na tej platformie, można go dodać ręcznie w <a href="{{ url_for('public.company_edit', company_id=company.id) }}" style="color: var(--primary); text-decoration: underline;">edycji profilu firmy</a>.</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
@ -1186,17 +1186,7 @@
|
||||
<span class="step-text in_progress">Skanuję stronę WWW <span class="source-tag website">WWW</span></span>
|
||||
</div>
|
||||
|
||||
<!-- Step 2: Brave Search -->
|
||||
<div class="loading-step" id="step-brave">
|
||||
<div class="step-icon pending">
|
||||
<svg width="18" height="18" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<circle cx="12" cy="12" r="10" stroke-width="2"/>
|
||||
</svg>
|
||||
</div>
|
||||
<span class="step-text pending">Szukam w Brave Search <span class="source-tag brave">BRAVE</span></span>
|
||||
</div>
|
||||
|
||||
<!-- Step 3: Facebook -->
|
||||
<!-- Step 2: Facebook -->
|
||||
<div class="loading-step" id="step-facebook">
|
||||
<div class="step-icon pending">
|
||||
<svg width="18" height="18" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
@ -1301,7 +1291,7 @@ const companySlug = '{{ company.slug }}';
|
||||
|
||||
// All step IDs in order
|
||||
const allSteps = [
|
||||
'step-website', 'step-brave',
|
||||
'step-website',
|
||||
'step-facebook', 'step-instagram', 'step-linkedin',
|
||||
'step-youtube', 'step-twitter', 'step-tiktok',
|
||||
'step-google', 'step-save'
|
||||
@ -1399,12 +1389,6 @@ async function animatePlatformSteps(foundPlatforms, googleData) {
|
||||
updateStep('step-website', 'complete', 'Skanowanie strony WWW zakończone <span class="source-tag website">WWW</span>');
|
||||
await new Promise(r => setTimeout(r, delay));
|
||||
|
||||
// Brave Search complete
|
||||
updateStep('step-brave', 'in_progress', 'Przeszukuję Brave Search... <span class="source-tag brave">BRAVE</span>');
|
||||
await new Promise(r => setTimeout(r, delay));
|
||||
updateStep('step-brave', 'complete', 'Wyszukiwanie dodatkowych profili zakończone <span class="source-tag brave">BRAVE</span>');
|
||||
await new Promise(r => setTimeout(r, delay / 2));
|
||||
|
||||
// Process each platform
|
||||
const platforms = ['facebook', 'instagram', 'linkedin', 'youtube', 'twitter', 'tiktok'];
|
||||
for (const platform of platforms) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user