Strip all standalone bracketed text from LLM responses

The model paraphrases injected metadata in unpredictable ways, so targeted regexes can't keep up. Replace them with a single rule: any [bracketed block] on its own line gets removed, since real roasts never use standalone brackets. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 15:24:18 -05:00
parent 942f5ddce7
commit 02b2870f2b
1 changed files with 9 additions and 15 deletions
@@ -204,16 +204,13 @@ class ChatCog(commands.Cog):
        if typing_ctx:
            await typing_ctx.__aexit__(None, None, None)
-        # Strip leaked metadata the LLM may echo back
+        # Strip leaked metadata the LLM may echo back.
        # The LLM paraphrases/reformats injected context in unpredictable ways,
        # so nuke any [bracketed block] that sits on its own line — real roasts
        # don't use standalone bracketed text.
        if response:
-            response = re.sub(r"\[Server context:[^\]]*\]\n?", "", response)
+            response = re.sub(r"^\s*\[[^\]]*\]\s*$", "", response, flags=re.MULTILINE)
-            response = re.sub(r"\[Replying to bot's message:[^\]]*\]\n?", "", response)
+            response = re.sub(r"\n{2,}", "\n", response).strip()
            response = re.sub(r"\[\w[\w ]* said:[^\]]*\]\n?", "", response)
            response = re.sub(r"\[Notes about [^\]]*\]\n?", "", response)
            response = re.sub(r"\[[^\]]*'s recent messages:[\s\S]*?\]\n?", "", response)
            # Catch reformatted metadata (LLM drops prefix but keeps content)
            response = re.sub(r"\[[^\]]*#[a-z-]+[^\]]*(?:drama score|offense)[^\]]*\]\n?", "", response, flags=re.IGNORECASE)
            response = response.strip()
        if not response:
            log_channel = discord.utils.get(message.guild.text_channels, name="bcs-log")
@@ -320,13 +317,10 @@ class ChatCog(commands.Cog):
            recent_bot_replies=recent_bot_replies,
        )
-        # Strip leaked metadata
+        # Strip leaked metadata (same catch-all as main chat path)
        if response:
-            response = re.sub(r"\[Server context:[^\]]*\]\n?", "", response)
+            response = re.sub(r"^\s*\[[^\]]*\]\s*$", "", response, flags=re.MULTILINE)
-            response = re.sub(r"\[.*?reacted to your message.*?\]\n?", "", response)
+            response = re.sub(r"\n{2,}", "\n", response).strip()
            response = re.sub(r"\[Your message was:.*?\]\n?", "", response)
            response = re.sub(r"\[[^\]]*#[a-z-]+[^\]]*(?:drama score|offense)[^\]]*\]\n?", "", response, flags=re.IGNORECASE)
            response = response.strip()
        if not response:
            return