Fix repetitive roast responses with anti-repetition mechanisms

Add frequency_penalty (0.8) and presence_penalty (0.6) to LLM chat calls to discourage repeated tokens. Inject the bot's last 5 responses into the system prompt so the model knows what to avoid. Strengthen the roast prompt with explicit anti-repetition rules and remove example lines the model was copying verbatim ("Real ___ energy", etc.). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 15:15:11 -05:00
parent 534aac5cd7
commit 942f5ddce7
3 changed files with 45 additions and 16 deletions
@@ -298,7 +298,7 @@ class LLMClient:

    async def chat(
        self, messages: list[dict[str, str]], system_prompt: str,
-        on_first_token=None,
+        on_first_token=None, recent_bot_replies: list[str] | None = None,
    ) -> str | None:
        """Send a conversational chat request (no tools).

@@ -308,8 +308,20 @@ class LLMClient:
        """
        patched = list(messages)

+        # Append recent bot replies to the system prompt so the model avoids
+        # repeating the same phrases / joke structures.
+        effective_prompt = system_prompt
+        if recent_bot_replies:
+            avoid_block = "\n".join(f"- {r}" for r in recent_bot_replies)
+            effective_prompt += (
+                "\n\nIMPORTANT — you recently said the following. "
+                "Do NOT reuse any of these phrases, sentence structures, or joke patterns. "
+                "Come up with something completely different.\n"
+                + avoid_block
+            )
+
        req_json = json.dumps([
-            {"role": "system", "content": system_prompt[:500]},
+            {"role": "system", "content": effective_prompt[:500]},
            *[{"role": m["role"], "content": str(m.get("content", ""))[:200]} for m in patched],
        ], default=str)
        t0 = time.monotonic()
@@ -319,11 +331,13 @@ class LLMClient:
                stream = await self._client.chat.completions.create(
                    model=self.model,
                    messages=[
-                        {"role": "system", "content": system_prompt},
+                        {"role": "system", "content": effective_prompt},
                        *patched,
                    ],
-                    temperature=0.8,
+                    temperature=0.9,
                    max_tokens=2048,
+                    frequency_penalty=0.8,
+                    presence_penalty=0.6,
                    stream=True,
                )