diff --git a/bot.py b/bot.py
index fc79ade..5650d96 100644
--- a/bot.py
+++ b/bot.py
@@ -65,12 +65,16 @@ class BCSBot(commands.Bot):
 
         self.config = config
 
-        # LLM client (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
+        # LLM clients (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
         llm_base_url = os.getenv("LLM_BASE_URL", "http://athena.lan:11434")
         llm_model = os.getenv("LLM_MODEL", "Qwen3-VL-32B-Thinking-Q8_0")
         llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
         self.llm = LLMClient(llm_base_url, llm_model, llm_api_key)
 
+        # Heavy/escalation model for re-analysis, chat, and manual commands
+        llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", llm_model)
+        self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key)
+
         # Drama tracker
         sentiment = config.get("sentiment", {})
         timeouts = config.get("timeouts", {})
@@ -167,6 +171,7 @@ class BCSBot(commands.Bot):
     async def close(self):
         await self.db.close()
         await self.llm.close()
+        await self.llm_heavy.close()
         await super().close()
 
 
diff --git a/cogs/chat.py b/cogs/chat.py
index e8e0bda..4ecde05 100644
--- a/cogs/chat.py
+++ b/cogs/chat.py
@@ -84,7 +84,7 @@ class ChatCog(commands.Cog):
                 image_attachment.filename,
                 user_text[:80],
             )
-            response = await self.bot.llm.analyze_image(
+            response = await self.bot.llm_heavy.analyze_image(
                 image_bytes,
                 SCOREBOARD_ROAST,
                 user_text=user_text,
@@ -108,7 +108,7 @@ class ChatCog(commands.Cog):
                 {"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"}
             )
 
-            response = await self.bot.llm.chat(
+            response = await self.bot.llm_heavy.chat(
                 list(self._chat_history[ch_id]),
                 CHAT_PERSONALITY,
                 on_first_token=start_typing,
diff --git a/cogs/commands.py b/cogs/commands.py
index 2413778..6a514ef 100644
--- a/cogs/commands.py
+++ b/cogs/commands.py
@@ -126,9 +126,19 @@ class CommandsCog(commands.Cog):
             inline=True,
         )
         embed.add_field(
-            name="LLM",
-            value=f"`{self.bot.llm.model}` @ `{self.bot.llm.host}`",
-            inline=False,
+            name="Triage Model",
+            value=f"`{self.bot.llm.model}`",
+            inline=True,
+        )
+        embed.add_field(
+            name="Escalation Model",
+            value=f"`{self.bot.llm_heavy.model}`",
+            inline=True,
+        )
+        embed.add_field(
+            name="LLM Host",
+            value=f"`{self.bot.llm.host}`",
+            inline=True,
         )
 
         await interaction.response.send_message(embed=embed, ephemeral=True)
@@ -301,7 +311,7 @@ class CommandsCog(commands.Cog):
                 else "(no prior context)"
             )
 
-            result = await self.bot.llm.analyze_message(msg.content, context)
+            result = await self.bot.llm_heavy.analyze_message(msg.content, context)
             if result is None:
                 embed = discord.Embed(
                     title=f"Analysis: {msg.author.display_name}",
@@ -374,7 +384,7 @@ class CommandsCog(commands.Cog):
             channel_context = "\n".join(lines)
 
         user_notes = self.bot.drama_tracker.get_user_notes(interaction.user.id)
-        raw, parsed = await self.bot.llm.raw_analyze(
+        raw, parsed = await self.bot.llm_heavy.raw_analyze(
             message, user_notes=user_notes, channel_context=channel_context,
         )
 
diff --git a/cogs/sentiment.py b/cogs/sentiment.py
index 3e3ebe7..c2b0b2e 100644
--- a/cogs/sentiment.py
+++ b/cogs/sentiment.py
@@ -141,7 +141,7 @@ class SentimentCog(commands.Cog):
         game_channels = config.get("game_channels", {})
         channel_context = self._build_channel_context(message, game_channels)
 
-        # Analyze the combined message
+        # Analyze the combined message (triage with lightweight model)
         context = self._get_context(message)
         user_notes = self.bot.drama_tracker.get_user_notes(message.author.id)
         result = await self.bot.llm.analyze_message(
@@ -152,6 +152,26 @@ class SentimentCog(commands.Cog):
         if result is None:
             return
 
+        # Escalation: re-analyze with heavy model if triage flags something
+        escalation_threshold = sentiment_config.get("escalation_threshold", 0.25)
+        needs_escalation = (
+            result["toxicity_score"] >= escalation_threshold
+            or result.get("off_topic", False)
+            or result.get("coherence_score", 1.0) < 0.6
+        )
+        if needs_escalation:
+            triage_score = result["toxicity_score"]
+            heavy_result = await self.bot.llm_heavy.analyze_message(
+                combined_content, context, user_notes=user_notes,
+                channel_context=channel_context,
+            )
+            if heavy_result is not None:
+                logger.info(
+                    "Escalated to heavy model (triage_score=%.2f) for %s",
+                    triage_score, message.author.display_name,
+                )
+                result = heavy_result
+
         score = result["toxicity_score"]
         categories = result["categories"]
         reasoning = result["reasoning"]
diff --git a/config.yaml b/config.yaml
index dbb28fc..7ea70c2 100644
--- a/config.yaml
+++ b/config.yaml
@@ -18,6 +18,7 @@ sentiment:
   rolling_window_size: 10  # Number of messages to track per user
   rolling_window_minutes: 15  # Time window for tracking
   batch_window_seconds: 3  # Wait this long for more messages before analyzing (debounce)
+  escalation_threshold: 0.25  # Triage toxicity score that triggers re-analysis with heavy model
 
 game_channels:
   gta-online: "GTA Online"