From ce9513b4bb1882891b68d1af0d09ab26ae3fb755 Mon Sep 17 00:00:00 2001 From: Maciej Pienczyn Date: Wed, 18 Feb 2026 18:20:52 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20Remove=20Brave=20Search=20from=20social?= =?UTF-8?q?=20media=20audit=20=E2=80=94=20too=20many=20false=20positives?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brave Search matched unrelated companies by name token (e.g. VINDOR matched vindorclothing, vindormusic, beautybyneyador). Social media profiles are now sourced only from website scraping and manual admin entry. - Disabled BraveSearcher initialization and call in audit_company() - Removed Brave Search step from audit progress animation - Updated missing profile message with explanation and link to profile editor - Added migration 071 to clean up existing brave_search entries Co-Authored-By: Claude Opus 4.6 --- .../071_remove_brave_search_social_media.sql | 13 +++++++++ scripts/social_media_audit.py | 27 ++++++------------- templates/social_audit.html | 26 ++++-------------- 3 files changed, 26 insertions(+), 40 deletions(-) create mode 100644 database/migrations/071_remove_brave_search_social_media.sql diff --git a/database/migrations/071_remove_brave_search_social_media.sql b/database/migrations/071_remove_brave_search_social_media.sql new file mode 100644 index 0000000..d0c3db4 --- /dev/null +++ b/database/migrations/071_remove_brave_search_social_media.sql @@ -0,0 +1,13 @@ +-- Migration 071: Remove false positive social media entries from Brave Search +-- Brave Search produced many false positives (matching unrelated companies by name token). +-- Social media profiles are now sourced only from website scraping and manual admin entry. + +-- First, let's see what we're deleting (for audit purposes) +-- SELECT company_id, platform, url, source, verified_at +-- FROM company_social_media +-- WHERE source = 'brave_search' +-- ORDER BY company_id, platform; + +-- Delete all Brave Search entries +DELETE FROM company_social_media +WHERE source = 'brave_search'; diff --git a/scripts/social_media_audit.py b/scripts/social_media_audit.py index 59059c8..865b69a 100644 --- a/scripts/social_media_audit.py +++ b/scripts/social_media_audit.py @@ -1302,7 +1302,9 @@ class SocialMediaAuditor: self.engine = create_engine(database_url) self.Session = sessionmaker(bind=self.engine) self.website_auditor = WebsiteAuditor() - self.brave_searcher = BraveSearcher() + # Brave Search disabled — too many false positives (matches unrelated companies by name) + # Social media profiles are now sourced only from website scraping and manual admin entry + # self.brave_searcher = BraveSearcher() self.profile_enricher = SocialProfileEnricher() # Initialize Google Places searcher if API key is available @@ -1402,24 +1404,11 @@ class SocialMediaAuditor: else: logger.info("No social media links found on website") - # 3. Search for additional social media via Brave - city = company.get('address_city', 'Wejherowo') - try: - logger.info(f"Searching Brave for social media: {company['name']} in {city}") - brave_social = self.brave_searcher.search_social_media(company['name'], city) - if brave_social: - logger.info(f"Brave search found: {list(brave_social.keys())}") - else: - logger.info("Brave search found no additional social media") - # Merge, website takes precedence - for platform, url in brave_social.items(): - if platform not in website_social: - website_social[platform] = url - social_sources[platform] = 'brave_search' - logger.info(f"Added {platform} from Brave search: {url}") - except Exception as e: - logger.warning(f"Brave search failed: {str(e)}") - result['errors'].append(f'Brave search failed: {str(e)}') + # 3. Brave Search disabled — too many false positives + # Social media profiles are now sourced only from: + # - Website scraping (automatic, from company's own website) + # - Manual admin entry (via company profile editing) + # - OAuth API (Facebook Graph API for verified data) result['social_media'] = website_social result['social_sources'] = social_sources diff --git a/templates/social_audit.html b/templates/social_audit.html index bf4a0c7..86b51f8 100644 --- a/templates/social_audit.html +++ b/templates/social_audit.html @@ -550,7 +550,6 @@ } .source-tag.website { background: #6366f1; color: white; } - .source-tag.brave { background: #fb542b; color: white; } .source-tag.google { background: #4285f4; color: white; } .source-tag.facebook { background: #1877f2; color: white; } .source-tag.instagram { background: #e4405f; color: white; } @@ -857,7 +856,7 @@
Źródło: - {% if profile.source == 'website_scrape' %}Ze strony WWW{% elif profile.source == 'brave_search' %}Wyszukiwarka{% elif profile.source == 'manual' %}Ręcznie{% elif profile.source == 'facebook_api' %}Facebook API{% else %}{{ profile.source }}{% endif %} + {% if profile.source == 'website_scrape' %}Ze strony WWW{% elif profile.source == 'manual' %}Ręcznie{% elif profile.source == 'facebook_api' %}Facebook API{% else %}{{ profile.source }}{% endif %}
{% endif %} @@ -1007,7 +1006,8 @@ {% endif %} {% else %} -

Nie znaleziono profilu na tej platformie

+

Profil nie został wykryty automatycznie na stronie internetowej firmy.

+

Jeśli firma posiada profil na tej platformie, można go dodać ręcznie w edycji profilu firmy.

{% endif %} @@ -1186,17 +1186,7 @@ Skanuję stronę WWW WWW - -
-
- - - -
- Szukam w Brave Search BRAVE -
- - +
@@ -1301,7 +1291,7 @@ const companySlug = '{{ company.slug }}'; // All step IDs in order const allSteps = [ - 'step-website', 'step-brave', + 'step-website', 'step-facebook', 'step-instagram', 'step-linkedin', 'step-youtube', 'step-twitter', 'step-tiktok', 'step-google', 'step-save' @@ -1399,12 +1389,6 @@ async function animatePlatformSteps(foundPlatforms, googleData) { updateStep('step-website', 'complete', 'Skanowanie strony WWW zakończone WWW'); await new Promise(r => setTimeout(r, delay)); - // Brave Search complete - updateStep('step-brave', 'in_progress', 'Przeszukuję Brave Search... BRAVE'); - await new Promise(r => setTimeout(r, delay)); - updateStep('step-brave', 'complete', 'Wyszukiwanie dodatkowych profili zakończone BRAVE'); - await new Promise(r => setTimeout(r, delay / 2)); - // Process each platform const platforms = ['facebook', 'instagram', 'linkedin', 'youtube', 'twitter', 'tiktok']; for (const platform of platforms) {