fix(nordagpt): catch bullet-point company hallucinations (* ProBud to...)
Some checks are pending
NordaBiz Tests / Smoke Tests (Production) (push) Blocked by required conditions
NordaBiz Tests / Send Failure Notification (push) Blocked by required conditions
NordaBiz Tests / Unit & Integration Tests (push) Waiting to run
NordaBiz Tests / E2E Tests (Playwright) (push) Blocked by required conditions

AI writes hallucinated company names at start of bullet points without
any prefix word. New pattern catches "* CompanyName to/–/specjalizuje"
and removes the fake name if it's not in the database.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Maciej Pienczyn 2026-03-28 06:58:53 +01:00
parent a1a64730e3
commit 513d32ffb2

View File

@ -245,24 +245,47 @@ class NordaBizChatEngine:
text = re.sub(r'\*\*([^*]{2,40})\*\*', replace_bold_company, text) text = re.sub(r'\*\*([^*]{2,40})\*\*', replace_bold_company, text)
# 4. Remove plain-text company name mentions that aren't linked # 4. Remove ALL plain-text company name mentions that aren't linked
# Catches: "firma Baumar", "również Pro-Bud", "firmy Baumar i Pro-Bud" # Catches: "firma Baumar", "również Pro-Bud", "* Pro-Bud to..."
def replace_plain_company(match): def check_company_name(name: str) -> bool:
prefix = match.group(1) # "firma", "również", etc. """Check if a name is a valid company."""
name = match.group(2).strip().rstrip('.,;:') name_clean = name.strip().rstrip('.,;:')
if name.lower() in valid_names_set: if name_clean.lower() in valid_names_set:
return match.group(0) # Valid company return True
for vn in valid_names_set: for vn in valid_names_set:
if name.lower() in vn or vn in name.lower(): if name_clean.lower() in vn or vn in name_clean.lower():
return match.group(0) # Partial match return True
return False
def replace_plain_company(match):
prefix = match.group(1)
name = match.group(2).strip().rstrip('.,;:')
if check_company_name(name):
return match.group(0)
logger.warning(f"NordaGPT hallucination blocked: plain text '{name}' after '{prefix}' not in DB") logger.warning(f"NordaGPT hallucination blocked: plain text '{name}' after '{prefix}' not in DB")
return '' return ''
# Pattern 1: "firma X", "również X", "oraz X"
text = re.sub( text = re.sub(
r'(firma|firmą|firmę|firmy|również|oraz)\s+([A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]{2,25}(?:\s+[A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]+)?)', r'(firma|firmą|firmę|firmy|również|oraz)\s+([A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]{2,25}(?:\s+[A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]+)?)',
replace_plain_company, text replace_plain_company, text
) )
# Pattern 2: "* CompanyName to//specjalizuje" at start of bullet point
def replace_bullet_company(match):
bullet = match.group(1) # "* " or "- "
name = match.group(2).strip()
suffix = match.group(3) # "to", "", "specjalizuje" etc.
if check_company_name(name):
return match.group(0)
logger.warning(f"NordaGPT hallucination blocked: bullet company '{name}' not in DB")
return f'{bullet}{suffix}' # Keep bullet and suffix, remove company name
text = re.sub(
r'(\*\s+)([A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]{2,25}(?:\s+[A-ZĄĘÓŁŹŻŚĆŃ][a-zA-ZąęółźżśćńĄĘÓŁŹŻŚĆŃ-]+)?)\s+(to | |specjali|oferuj|zajmuj|zapewni|posiada|świadcz)',
replace_bullet_company, text
)
# 5. Clean up artifacts left by removals # 5. Clean up artifacts left by removals
text = re.sub(r':\s*oraz\s*to\b', ': to', text) # ": oraz to" → ": to" text = re.sub(r':\s*oraz\s*to\b', ': to', text) # ": oraz to" → ": to"
text = re.sub(r':\s*,', ':', text) # ": ," → ":" text = re.sub(r':\s*,', ':', text) # ": ," → ":"