Add two-tier LLM analysis with triage/escalation

Triage model (LLM_MODEL) handles every message cheaply. If toxicity
>= 0.25, off_topic, or coherence < 0.6, the message is re-analyzed
with the heavy model (LLM_ESCALATION_MODEL). Chat, image analysis,
/bcs-test, and /bcs-scan always use the heavy model.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 18:33:36 -05:00
parent 64e9474c99
commit b9bac899f9
5 changed files with 45 additions and 9 deletions

View File

@@ -84,7 +84,7 @@ class ChatCog(commands.Cog):
image_attachment.filename,
user_text[:80],
)
response = await self.bot.llm.analyze_image(
response = await self.bot.llm_heavy.analyze_image(
image_bytes,
SCOREBOARD_ROAST,
user_text=user_text,
@@ -108,7 +108,7 @@ class ChatCog(commands.Cog):
{"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"}
)
response = await self.bot.llm.chat(
response = await self.bot.llm_heavy.chat(
list(self._chat_history[ch_id]),
CHAT_PERSONALITY,
on_first_token=start_typing,

View File

@@ -126,9 +126,19 @@ class CommandsCog(commands.Cog):
inline=True,
)
embed.add_field(
name="LLM",
value=f"`{self.bot.llm.model}` @ `{self.bot.llm.host}`",
inline=False,
name="Triage Model",
value=f"`{self.bot.llm.model}`",
inline=True,
)
embed.add_field(
name="Escalation Model",
value=f"`{self.bot.llm_heavy.model}`",
inline=True,
)
embed.add_field(
name="LLM Host",
value=f"`{self.bot.llm.host}`",
inline=True,
)
await interaction.response.send_message(embed=embed, ephemeral=True)
@@ -301,7 +311,7 @@ class CommandsCog(commands.Cog):
else "(no prior context)"
)
result = await self.bot.llm.analyze_message(msg.content, context)
result = await self.bot.llm_heavy.analyze_message(msg.content, context)
if result is None:
embed = discord.Embed(
title=f"Analysis: {msg.author.display_name}",
@@ -374,7 +384,7 @@ class CommandsCog(commands.Cog):
channel_context = "\n".join(lines)
user_notes = self.bot.drama_tracker.get_user_notes(interaction.user.id)
raw, parsed = await self.bot.llm.raw_analyze(
raw, parsed = await self.bot.llm_heavy.raw_analyze(
message, user_notes=user_notes, channel_context=channel_context,
)

View File

@@ -141,7 +141,7 @@ class SentimentCog(commands.Cog):
game_channels = config.get("game_channels", {})
channel_context = self._build_channel_context(message, game_channels)
# Analyze the combined message
# Analyze the combined message (triage with lightweight model)
context = self._get_context(message)
user_notes = self.bot.drama_tracker.get_user_notes(message.author.id)
result = await self.bot.llm.analyze_message(
@@ -152,6 +152,26 @@ class SentimentCog(commands.Cog):
if result is None:
return
# Escalation: re-analyze with heavy model if triage flags something
escalation_threshold = sentiment_config.get("escalation_threshold", 0.25)
needs_escalation = (
result["toxicity_score"] >= escalation_threshold
or result.get("off_topic", False)
or result.get("coherence_score", 1.0) < 0.6
)
if needs_escalation:
triage_score = result["toxicity_score"]
heavy_result = await self.bot.llm_heavy.analyze_message(
combined_content, context, user_notes=user_notes,
channel_context=channel_context,
)
if heavy_result is not None:
logger.info(
"Escalated to heavy model (triage_score=%.2f) for %s",
triage_score, message.author.display_name,
)
result = heavy_result
score = result["toxicity_score"]
categories = result["categories"]
reasoning = result["reasoning"]