Strip all standalone bracketed text from LLM responses

The model paraphrases injected metadata in unpredictable ways, so
targeted regexes can't keep up. Replace them with a single rule: any
[bracketed block] on its own line gets removed, since real roasts
never use standalone brackets.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 15:24:18 -05:00
parent 942f5ddce7
commit 02b2870f2b

View File

@@ -204,16 +204,13 @@ class ChatCog(commands.Cog):
if typing_ctx:
await typing_ctx.__aexit__(None, None, None)
# Strip leaked metadata the LLM may echo back
# Strip leaked metadata the LLM may echo back.
# The LLM paraphrases/reformats injected context in unpredictable ways,
# so nuke any [bracketed block] that sits on its own line — real roasts
# don't use standalone bracketed text.
if response:
response = re.sub(r"\[Server context:[^\]]*\]\n?", "", response)
response = re.sub(r"\[Replying to bot's message:[^\]]*\]\n?", "", response)
response = re.sub(r"\[\w[\w ]* said:[^\]]*\]\n?", "", response)
response = re.sub(r"\[Notes about [^\]]*\]\n?", "", response)
response = re.sub(r"\[[^\]]*'s recent messages:[\s\S]*?\]\n?", "", response)
# Catch reformatted metadata (LLM drops prefix but keeps content)
response = re.sub(r"\[[^\]]*#[a-z-]+[^\]]*(?:drama score|offense)[^\]]*\]\n?", "", response, flags=re.IGNORECASE)
response = response.strip()
response = re.sub(r"^\s*\[[^\]]*\]\s*$", "", response, flags=re.MULTILINE)
response = re.sub(r"\n{2,}", "\n", response).strip()
if not response:
log_channel = discord.utils.get(message.guild.text_channels, name="bcs-log")
@@ -320,13 +317,10 @@ class ChatCog(commands.Cog):
recent_bot_replies=recent_bot_replies,
)
# Strip leaked metadata
# Strip leaked metadata (same catch-all as main chat path)
if response:
response = re.sub(r"\[Server context:[^\]]*\]\n?", "", response)
response = re.sub(r"\[.*?reacted to your message.*?\]\n?", "", response)
response = re.sub(r"\[Your message was:.*?\]\n?", "", response)
response = re.sub(r"\[[^\]]*#[a-z-]+[^\]]*(?:drama score|offense)[^\]]*\]\n?", "", response, flags=re.IGNORECASE)
response = response.strip()
response = re.sub(r"^\s*\[[^\]]*\]\s*$", "", response, flags=re.MULTILINE)
response = re.sub(r"\n{2,}", "\n", response).strip()
if not response:
return