diff --git a/cogs/chat.py b/cogs/chat.py
index 001b71c..870ceea 100644
--- a/cogs/chat.py
+++ b/cogs/chat.py
@@ -188,10 +188,17 @@ class ChatCog(commands.Cog):
 
             active_prompt = self._get_active_prompt()
 
+            # Collect recent bot replies so the LLM can avoid repeating itself
+            recent_bot_replies = [
+                m["content"][:150] for m in self._chat_history[ch_id]
+                if m["role"] == "assistant"
+            ][-5:]
+
             response = await self.bot.llm.chat(
                 list(self._chat_history[ch_id]),
                 active_prompt,
                 on_first_token=start_typing,
+                recent_bot_replies=recent_bot_replies,
             )
 
         if typing_ctx:
@@ -302,9 +309,15 @@ class ChatCog(commands.Cog):
         self._chat_history[ch_id].append({"role": "user", "content": context})
         active_prompt = self._get_active_prompt()
 
+        recent_bot_replies = [
+            m["content"][:150] for m in self._chat_history[ch_id]
+            if m["role"] == "assistant"
+        ][-5:]
+
         response = await self.bot.llm.chat(
             list(self._chat_history[ch_id]),
             active_prompt,
+            recent_bot_replies=recent_bot_replies,
         )
 
         # Strip leaked metadata
diff --git a/prompts/chat_roast.txt b/prompts/chat_roast.txt
index e48b146..3875d27 100644
--- a/prompts/chat_roast.txt
+++ b/prompts/chat_roast.txt
@@ -13,19 +13,20 @@ Your personality:
 - You use gaming terminology to roast people ("hardstuck", "skill diff", "ratio'd", etc.)
 - If someone tries to roast you back, you escalate harder
 
-Vary your roast style. Mix it up between:
-- Deadpan observations: just state the embarrassing fact
+Vary your roast style. Rotate between these — NEVER use the same style twice in a row:
+- Deadpan observations: just state the embarrassing fact dryly
 - Sarcastic hype: pretend to compliment them while destroying them
-- Rhetorical questions: make them question their own choices
-- Blunt callouts: just say it directly, no cleverness needed
-- Exaggeration: take what they said and blow it out of proportion
+- Rhetorical questions: make them question their own life choices
+- Blunt callouts: just say it plain, no cleverness needed
+- Exaggeration: take what they said and blow it way out of proportion
+- Backhanded encouragement: cheer them on for being terrible
+- Fake concern: act worried about them as if their gameplay is a medical condition
 
-Examples of your vibe:
-- "You went 2-15 and have the audacity to type in this chat? Bold."
-- "That take is so bad I thought my monitor was upside down."
-- "Imagine losing to THAT team. I'd uninstall my whole PC."
-- "Congrats on being the reason your team lost. Real leadership energy."
-- "I've seen bots with better game sense. Actual bots. The ones that walk into walls."
+CRITICAL ANTI-REPETITION RULES:
+- NEVER reuse phrases from your recent messages. If you just said something about "1v3", "brackets", "dumpster fire", "MVP energy", "skill diff", "stats", "hardstuck", or "uninstall", pick a COMPLETELY different angle.
+- Each response must use different vocabulary, structure, and punchline style from your last several messages.
+- React to what the person ACTUALLY said — don't default to generic gaming roasts. Read their message and find something specific to roast about it.
+- If you can't think of something specific, riff on the absurdity of what they typed, not gaming performance.
 
 Do NOT:
 - Break character or talk about being an AI/LLM
@@ -33,4 +34,5 @@ Do NOT:
 - Use hashtags or excessive emoji
 - Use metaphors or similes (no "like" or "as if" comparisons). Just say it directly.
 - Cross into genuinely hurtful territory (racism, real personal attacks, etc.)
-- Roast people about things outside of gaming/chat context (real appearance, family, etc.)
\ No newline at end of file
+- Roast people about things outside of gaming/chat context (real appearance, family, etc.)
+- Fall back on the same template over and over (e.g., "Real ___ energy", "You're the reason ___")
\ No newline at end of file
diff --git a/utils/llm_client.py b/utils/llm_client.py
index cfa6ecd..f57f14e 100644
--- a/utils/llm_client.py
+++ b/utils/llm_client.py
@@ -298,7 +298,7 @@ class LLMClient:
 
     async def chat(
         self, messages: list[dict[str, str]], system_prompt: str,
-        on_first_token=None,
+        on_first_token=None, recent_bot_replies: list[str] | None = None,
     ) -> str | None:
         """Send a conversational chat request (no tools).
 
@@ -308,8 +308,20 @@ class LLMClient:
         """
         patched = list(messages)
 
+        # Append recent bot replies to the system prompt so the model avoids
+        # repeating the same phrases / joke structures.
+        effective_prompt = system_prompt
+        if recent_bot_replies:
+            avoid_block = "\n".join(f"- {r}" for r in recent_bot_replies)
+            effective_prompt += (
+                "\n\nIMPORTANT — you recently said the following. "
+                "Do NOT reuse any of these phrases, sentence structures, or joke patterns. "
+                "Come up with something completely different.\n"
+                + avoid_block
+            )
+
         req_json = json.dumps([
-            {"role": "system", "content": system_prompt[:500]},
+            {"role": "system", "content": effective_prompt[:500]},
             *[{"role": m["role"], "content": str(m.get("content", ""))[:200]} for m in patched],
         ], default=str)
         t0 = time.monotonic()
@@ -319,11 +331,13 @@ class LLMClient:
                 stream = await self._client.chat.completions.create(
                     model=self.model,
                     messages=[
-                        {"role": "system", "content": system_prompt},
+                        {"role": "system", "content": effective_prompt},
                         *patched,
                     ],
-                    temperature=0.8,
+                    temperature=0.9,
                     max_tokens=2048,
+                    frequency_penalty=0.8,
+                    presence_penalty=0.6,
                     stream=True,
                 )