fix: strengthen Google Places name validation to prevent cross-company data contamination
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
- Replace substring matching with word-boundary tokenized matching - Short names (1-2 words): require ALL significant words to match - Longer names (3+): require at least 50% word overlap - Pick best-scoring result instead of first match - Add company_name validation to competitor_monitoring_service - Show Google profile name in dashboard hints for admin verification - Display mismatch warning when Google name differs from company name Prevents cases like "IT Space" matching "Body Space" (score 0.50 < 1.00 threshold). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
e0bb6b718a
commit
ae9a658b0c
@ -741,10 +741,13 @@ def admin_company_detail(company_id):
|
||||
# --- Hints: where to find missing data ---
|
||||
hints = {}
|
||||
analysis = seo_analysis # CompanyWebsiteAnalysis object or None
|
||||
# Google name for mismatch warning (e.g. "IT Space" vs "Body Space")
|
||||
google_name = (analysis.google_name or '') if analysis else ''
|
||||
google_name_mismatch = bool(google_name and google_name.lower() != company.name.lower())
|
||||
|
||||
if not company.phone:
|
||||
if analysis and analysis.google_phone:
|
||||
hints['Telefon'] = {'source': 'Google Business', 'value': analysis.google_phone, 'action': 'apply'}
|
||||
hints['Telefon'] = {'source': 'Google Business', 'value': analysis.google_phone, 'action': 'apply', 'google_name': google_name if google_name_mismatch else ''}
|
||||
elif analysis and analysis.nap_on_website:
|
||||
nap = analysis.nap_on_website if isinstance(analysis.nap_on_website, dict) else {}
|
||||
if nap.get('phone'):
|
||||
@ -760,11 +763,11 @@ def admin_company_detail(company_id):
|
||||
|
||||
if not company.website:
|
||||
if analysis and analysis.google_website:
|
||||
hints['Strona WWW'] = {'source': 'Google Business', 'value': analysis.google_website, 'action': 'apply'}
|
||||
hints['Strona WWW'] = {'source': 'Google Business', 'value': analysis.google_website, 'action': 'apply', 'google_name': google_name if google_name_mismatch else ''}
|
||||
|
||||
if not company.address_city:
|
||||
if analysis and analysis.google_address:
|
||||
hints['Adres'] = {'source': 'Google Business', 'value': analysis.google_address, 'action': 'apply'}
|
||||
hints['Adres'] = {'source': 'Google Business', 'value': analysis.google_address, 'action': 'apply', 'google_name': google_name if google_name_mismatch else ''}
|
||||
|
||||
if not company.description_short:
|
||||
if analysis and analysis.content_summary:
|
||||
|
||||
@ -183,6 +183,7 @@ def admin_data_quality():
|
||||
avg_score = round(score_sum / total) if total > 0 else 0
|
||||
|
||||
# Available data: companies where Google has data but company profile is empty
|
||||
# Include google_name so admin can verify the match is correct
|
||||
available_data = []
|
||||
analyses = db.query(CompanyWebsiteAnalysis).all()
|
||||
company_map = {c.id: c for c in companies}
|
||||
@ -191,20 +192,24 @@ def admin_data_quality():
|
||||
comp = company_map.get(a.company_id)
|
||||
if not comp:
|
||||
continue
|
||||
g_name = a.google_name or ''
|
||||
if a.google_phone and not comp.phone:
|
||||
available_data.append({
|
||||
'company_id': comp.id, 'company_name': comp.name, 'company_slug': comp.slug,
|
||||
'field': 'Telefon', 'source': 'Google Business', 'value': a.google_phone
|
||||
'field': 'Telefon', 'source': 'Google Business', 'value': a.google_phone,
|
||||
'google_name': g_name,
|
||||
})
|
||||
if a.google_website and not comp.website:
|
||||
available_data.append({
|
||||
'company_id': comp.id, 'company_name': comp.name, 'company_slug': comp.slug,
|
||||
'field': 'Strona WWW', 'source': 'Google Business', 'value': a.google_website
|
||||
'field': 'Strona WWW', 'source': 'Google Business', 'value': a.google_website,
|
||||
'google_name': g_name,
|
||||
})
|
||||
if a.google_address and not comp.address_city:
|
||||
available_data.append({
|
||||
'company_id': comp.id, 'company_name': comp.name, 'company_slug': comp.slug,
|
||||
'field': 'Adres', 'source': 'Google Business', 'value': a.google_address
|
||||
'field': 'Adres', 'source': 'Google Business', 'value': a.google_address,
|
||||
'google_name': g_name,
|
||||
})
|
||||
|
||||
return render_template(
|
||||
|
||||
@ -72,7 +72,8 @@ class CompetitorMonitoringService:
|
||||
logger.warning(f"Company {company_id} has no Google Place ID, searching by name")
|
||||
# Search for the company first
|
||||
place = self.places_service.search_place(
|
||||
f"{company.name} {company.address_city or 'Wejherowo'}"
|
||||
f"{company.name} {company.address_city or 'Wejherowo'}",
|
||||
company_name=company.name
|
||||
)
|
||||
if not place:
|
||||
return []
|
||||
|
||||
@ -136,6 +136,38 @@ class GooglePlacesService:
|
||||
logger.error(f"Places API request error for {place_id}: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _tokenize_name(name: str) -> set:
|
||||
"""Tokenize a company name into significant lowercase words."""
|
||||
import re as _re
|
||||
skip_words = {
|
||||
'sp', 'z', 'o', 'oo', 'sa', 'sc', 'j', 'k', 'ul', 'i', 'w',
|
||||
'do', 'na', 'po', 'ze', 'the', 'and', 'of', 'for', 'group',
|
||||
}
|
||||
# Split on non-alphanumeric, keep words
|
||||
words = _re.findall(r'[a-ząćęłńóśźż0-9]+', name.lower())
|
||||
return {w for w in words if len(w) > 1 and w not in skip_words}
|
||||
|
||||
@staticmethod
|
||||
def _name_match_score(company_name: str, google_name: str) -> float:
|
||||
"""
|
||||
Compute name match score between company name and Google result name.
|
||||
|
||||
Returns float 0.0-1.0:
|
||||
- 1.0 = all significant company words found in Google name
|
||||
- 0.0 = no words matched
|
||||
Uses word-boundary matching (not substring) to prevent
|
||||
'IT' matching 'digital' or 'Space' matching 'Body Space' alone.
|
||||
"""
|
||||
company_words = GooglePlacesService._tokenize_name(company_name)
|
||||
google_words = GooglePlacesService._tokenize_name(google_name)
|
||||
|
||||
if not company_words:
|
||||
return 0.0
|
||||
|
||||
matched = company_words & google_words
|
||||
return len(matched) / len(company_words)
|
||||
|
||||
def search_place(self, query: str, location_bias: Dict = None,
|
||||
company_name: str = None) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
@ -185,17 +217,32 @@ class GooglePlacesService:
|
||||
if not company_name:
|
||||
return places[0]
|
||||
|
||||
# Validate: at least one significant word from company name must appear in result name
|
||||
skip_words = {'sp', 'z', 'o', 'oo', 'sa', 'sc', 'j', 'k', 'ul', 'i', 'w', 'do', 'na', 'po', 'ze'}
|
||||
name_words = {w.lower() for w in company_name.split() if len(w) > 1 and w.lower() not in skip_words}
|
||||
# Validate: company name must significantly match Google result name.
|
||||
# Uses word-boundary matching with minimum threshold:
|
||||
# - Short names (1-2 significant words): ALL words must match
|
||||
# - Longer names (3+ words): at least 50% of words must match
|
||||
company_words = self._tokenize_name(company_name)
|
||||
min_ratio = 1.0 if len(company_words) <= 2 else 0.5
|
||||
|
||||
best_place = None
|
||||
best_score = 0.0
|
||||
|
||||
for place in places:
|
||||
google_name = place.get('displayName', {}).get('text', '').lower()
|
||||
if any(word in google_name for word in name_words):
|
||||
return place
|
||||
google_name = place.get('displayName', {}).get('text', '')
|
||||
score = self._name_match_score(company_name, google_name)
|
||||
if score >= min_ratio and score > best_score:
|
||||
best_score = score
|
||||
best_place = place
|
||||
|
||||
if best_place:
|
||||
matched_name = best_place.get('displayName', {}).get('text', '')
|
||||
logger.info(
|
||||
f"Name match for '{company_name}': '{matched_name}' (score={best_score:.2f})"
|
||||
)
|
||||
return best_place
|
||||
|
||||
logger.warning(
|
||||
f"No name match for '{company_name}' in Google results: "
|
||||
f"No name match for '{company_name}' (min_ratio={min_ratio:.0%}) in Google results: "
|
||||
f"{[p.get('displayName', {}).get('text', '') for p in places]}"
|
||||
)
|
||||
return None
|
||||
|
||||
@ -841,6 +841,11 @@
|
||||
<span style="font-size: var(--font-size-xs); color: var(--primary);">
|
||||
{{ hints[field_name].source }}{% if hints[field_name].value %}: {{ hints[field_name].value[:40] }}{% endif %}
|
||||
</span>
|
||||
{% if hints[field_name].get('google_name') %}
|
||||
<span style="font-size: var(--font-size-xs); color: #d97706;" title="Nazwa profilu Google nie zgadza się z nazwą firmy — zweryfikuj!">
|
||||
(profil: {{ hints[field_name].google_name[:30] }})
|
||||
</span>
|
||||
{% endif %}
|
||||
{% if hints[field_name].action == 'apply' and hints[field_name].value %}
|
||||
<button onclick="applyHint({{ company.id }}, '{{ field_name }}', '{{ hints[field_name].value|e }}')"
|
||||
class="hint-apply-btn" title="Uzupełnij to pole">Uzupełnij</button>
|
||||
|
||||
@ -582,7 +582,12 @@
|
||||
<tr id="avail-row-{{ loop.index }}">
|
||||
<td><a href="{{ url_for('admin.admin_company_detail', company_id=item.company_id) }}" class="dq-company-link">{{ item.company_name }}</a></td>
|
||||
<td>{{ item.field }}</td>
|
||||
<td><span style="font-size: var(--font-size-xs); color: var(--text-secondary);">{{ item.source }}</span></td>
|
||||
<td>
|
||||
<span style="font-size: var(--font-size-xs); color: var(--text-secondary);">{{ item.source }}</span>
|
||||
{% if item.google_name and item.google_name.lower() != item.company_name.lower() %}
|
||||
<br><span style="font-size: var(--font-size-xs); color: #d97706;" title="Nazwa profilu Google — zweryfikuj dopasowanie">Profil: {{ item.google_name[:40] }}</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td style="font-size: var(--font-size-sm);">{{ item.value[:50] }}</td>
|
||||
<td>
|
||||
<button class="hint-apply-btn" onclick="applyAvailableHint({{ item.company_id }}, '{{ item.field }}', '{{ item.value|e }}', 'avail-row-{{ loop.index }}')"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user