fix(nordagpt): catch bullet-point company hallucinations (* ProBud to...)
Some checks are pending
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
Some checks are pending
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions
AI writes hallucinated company names at start of bullet points without any prefix word. New pattern catches "* CompanyName to/–/specjalizuje" and removes the fake name if it's not in the database. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a1a64730e3
commit
513d32ffb2
@ -245,24 +245,47 @@ class NordaBizChatEngine:
|
||||
|
||||
text = re.sub(r'\*\*([^*]{2,40})\*\*', replace_bold_company, text)
|
||||
|
||||
# 4. Remove plain-text company name mentions that aren't linked
|
||||
# Catches: "firma Baumar", "również Pro-Bud", "firmy Baumar i Pro-Bud"
|
||||
def replace_plain_company(match):
|
||||
prefix = match.group(1) # "firma", "również", etc.
|
||||
name = match.group(2).strip().rstrip('.,;:')
|
||||
if name.lower() in valid_names_set:
|
||||
return match.group(0) # Valid company
|
||||
# 4. Remove ALL plain-text company name mentions that aren't linked
|
||||
# Catches: "firma Baumar", "również Pro-Bud", "* Pro-Bud to..."
|
||||
def check_company_name(name: str) -> bool:
|
||||
"""Check if a name is a valid company."""
|
||||
name_clean = name.strip().rstrip('.,;:')
|
||||
if name_clean.lower() in valid_names_set:
|
||||
return True
|
||||
for vn in valid_names_set:
|
||||
if name.lower() in vn or vn in name.lower():
|
||||
return match.group(0) # Partial match
|
||||
if name_clean.lower() in vn or vn in name_clean.lower():
|
||||
return True
|
||||
return False
|
||||
|
||||
def replace_plain_company(match):
|
||||
prefix = match.group(1)
|
||||
name = match.group(2).strip().rstrip('.,;:')
|
||||
if check_company_name(name):
|
||||
return match.group(0)
|
||||
logger.warning(f"NordaGPT hallucination blocked: plain text '{name}' after '{prefix}' not in DB")
|
||||
return ''
|
||||
|
||||
# Pattern 1: "firma X", "również X", "oraz X"
|
||||
text = re.sub(
|
||||
r'(firma|firmą|firmę|firmy|również|oraz)\s+([A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]{2,25}(?:\s+[A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]+)?)',
|
||||
replace_plain_company, text
|
||||
)
|
||||
|
||||
# Pattern 2: "* CompanyName to/–/specjalizuje" at start of bullet point
|
||||
def replace_bullet_company(match):
|
||||
bullet = match.group(1) # "* " or "- "
|
||||
name = match.group(2).strip()
|
||||
suffix = match.group(3) # "to", "–", "specjalizuje" etc.
|
||||
if check_company_name(name):
|
||||
return match.group(0)
|
||||
logger.warning(f"NordaGPT hallucination blocked: bullet company '{name}' not in DB")
|
||||
return f'{bullet}{suffix}' # Keep bullet and suffix, remove company name
|
||||
|
||||
text = re.sub(
|
||||
r'(\*\s+)([A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]{2,25}(?:\s+[A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]+)?)\s+(to |– |specjali|oferuj|zajmuj|zapewni|posiada|świadcz)',
|
||||
replace_bullet_company, text
|
||||
)
|
||||
|
||||
# 5. Clean up artifacts left by removals
|
||||
text = re.sub(r':\s*oraz\s*to\b', ': to', text) # ": oraz to" → ": to"
|
||||
text = re.sub(r':\s*,', ':', text) # ": ," → ":"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user