Add two-tier LLM analysis with triage/escalation
Triage model (LLM_MODEL) handles every message cheaply. If toxicity >= 0.25, off_topic, or coherence < 0.6, the message is re-analyzed with the heavy model (LLM_ESCALATION_MODEL). Chat, image analysis, /bcs-test, and /bcs-scan always use the heavy model. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -65,12 +65,16 @@ class BCSBot(commands.Bot):
|
|||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
# LLM client (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
|
# LLM clients (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
|
||||||
llm_base_url = os.getenv("LLM_BASE_URL", "http://athena.lan:11434")
|
llm_base_url = os.getenv("LLM_BASE_URL", "http://athena.lan:11434")
|
||||||
llm_model = os.getenv("LLM_MODEL", "Qwen3-VL-32B-Thinking-Q8_0")
|
llm_model = os.getenv("LLM_MODEL", "Qwen3-VL-32B-Thinking-Q8_0")
|
||||||
llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
|
llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
|
||||||
self.llm = LLMClient(llm_base_url, llm_model, llm_api_key)
|
self.llm = LLMClient(llm_base_url, llm_model, llm_api_key)
|
||||||
|
|
||||||
|
# Heavy/escalation model for re-analysis, chat, and manual commands
|
||||||
|
llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", llm_model)
|
||||||
|
self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key)
|
||||||
|
|
||||||
# Drama tracker
|
# Drama tracker
|
||||||
sentiment = config.get("sentiment", {})
|
sentiment = config.get("sentiment", {})
|
||||||
timeouts = config.get("timeouts", {})
|
timeouts = config.get("timeouts", {})
|
||||||
@@ -167,6 +171,7 @@ class BCSBot(commands.Bot):
|
|||||||
async def close(self):
|
async def close(self):
|
||||||
await self.db.close()
|
await self.db.close()
|
||||||
await self.llm.close()
|
await self.llm.close()
|
||||||
|
await self.llm_heavy.close()
|
||||||
await super().close()
|
await super().close()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+2
-2
@@ -84,7 +84,7 @@ class ChatCog(commands.Cog):
|
|||||||
image_attachment.filename,
|
image_attachment.filename,
|
||||||
user_text[:80],
|
user_text[:80],
|
||||||
)
|
)
|
||||||
response = await self.bot.llm.analyze_image(
|
response = await self.bot.llm_heavy.analyze_image(
|
||||||
image_bytes,
|
image_bytes,
|
||||||
SCOREBOARD_ROAST,
|
SCOREBOARD_ROAST,
|
||||||
user_text=user_text,
|
user_text=user_text,
|
||||||
@@ -108,7 +108,7 @@ class ChatCog(commands.Cog):
|
|||||||
{"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"}
|
{"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"}
|
||||||
)
|
)
|
||||||
|
|
||||||
response = await self.bot.llm.chat(
|
response = await self.bot.llm_heavy.chat(
|
||||||
list(self._chat_history[ch_id]),
|
list(self._chat_history[ch_id]),
|
||||||
CHAT_PERSONALITY,
|
CHAT_PERSONALITY,
|
||||||
on_first_token=start_typing,
|
on_first_token=start_typing,
|
||||||
|
|||||||
+15
-5
@@ -126,9 +126,19 @@ class CommandsCog(commands.Cog):
|
|||||||
inline=True,
|
inline=True,
|
||||||
)
|
)
|
||||||
embed.add_field(
|
embed.add_field(
|
||||||
name="LLM",
|
name="Triage Model",
|
||||||
value=f"`{self.bot.llm.model}` @ `{self.bot.llm.host}`",
|
value=f"`{self.bot.llm.model}`",
|
||||||
inline=False,
|
inline=True,
|
||||||
|
)
|
||||||
|
embed.add_field(
|
||||||
|
name="Escalation Model",
|
||||||
|
value=f"`{self.bot.llm_heavy.model}`",
|
||||||
|
inline=True,
|
||||||
|
)
|
||||||
|
embed.add_field(
|
||||||
|
name="LLM Host",
|
||||||
|
value=f"`{self.bot.llm.host}`",
|
||||||
|
inline=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
await interaction.response.send_message(embed=embed, ephemeral=True)
|
await interaction.response.send_message(embed=embed, ephemeral=True)
|
||||||
@@ -301,7 +311,7 @@ class CommandsCog(commands.Cog):
|
|||||||
else "(no prior context)"
|
else "(no prior context)"
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await self.bot.llm.analyze_message(msg.content, context)
|
result = await self.bot.llm_heavy.analyze_message(msg.content, context)
|
||||||
if result is None:
|
if result is None:
|
||||||
embed = discord.Embed(
|
embed = discord.Embed(
|
||||||
title=f"Analysis: {msg.author.display_name}",
|
title=f"Analysis: {msg.author.display_name}",
|
||||||
@@ -374,7 +384,7 @@ class CommandsCog(commands.Cog):
|
|||||||
channel_context = "\n".join(lines)
|
channel_context = "\n".join(lines)
|
||||||
|
|
||||||
user_notes = self.bot.drama_tracker.get_user_notes(interaction.user.id)
|
user_notes = self.bot.drama_tracker.get_user_notes(interaction.user.id)
|
||||||
raw, parsed = await self.bot.llm.raw_analyze(
|
raw, parsed = await self.bot.llm_heavy.raw_analyze(
|
||||||
message, user_notes=user_notes, channel_context=channel_context,
|
message, user_notes=user_notes, channel_context=channel_context,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
+21
-1
@@ -141,7 +141,7 @@ class SentimentCog(commands.Cog):
|
|||||||
game_channels = config.get("game_channels", {})
|
game_channels = config.get("game_channels", {})
|
||||||
channel_context = self._build_channel_context(message, game_channels)
|
channel_context = self._build_channel_context(message, game_channels)
|
||||||
|
|
||||||
# Analyze the combined message
|
# Analyze the combined message (triage with lightweight model)
|
||||||
context = self._get_context(message)
|
context = self._get_context(message)
|
||||||
user_notes = self.bot.drama_tracker.get_user_notes(message.author.id)
|
user_notes = self.bot.drama_tracker.get_user_notes(message.author.id)
|
||||||
result = await self.bot.llm.analyze_message(
|
result = await self.bot.llm.analyze_message(
|
||||||
@@ -152,6 +152,26 @@ class SentimentCog(commands.Cog):
|
|||||||
if result is None:
|
if result is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Escalation: re-analyze with heavy model if triage flags something
|
||||||
|
escalation_threshold = sentiment_config.get("escalation_threshold", 0.25)
|
||||||
|
needs_escalation = (
|
||||||
|
result["toxicity_score"] >= escalation_threshold
|
||||||
|
or result.get("off_topic", False)
|
||||||
|
or result.get("coherence_score", 1.0) < 0.6
|
||||||
|
)
|
||||||
|
if needs_escalation:
|
||||||
|
triage_score = result["toxicity_score"]
|
||||||
|
heavy_result = await self.bot.llm_heavy.analyze_message(
|
||||||
|
combined_content, context, user_notes=user_notes,
|
||||||
|
channel_context=channel_context,
|
||||||
|
)
|
||||||
|
if heavy_result is not None:
|
||||||
|
logger.info(
|
||||||
|
"Escalated to heavy model (triage_score=%.2f) for %s",
|
||||||
|
triage_score, message.author.display_name,
|
||||||
|
)
|
||||||
|
result = heavy_result
|
||||||
|
|
||||||
score = result["toxicity_score"]
|
score = result["toxicity_score"]
|
||||||
categories = result["categories"]
|
categories = result["categories"]
|
||||||
reasoning = result["reasoning"]
|
reasoning = result["reasoning"]
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ sentiment:
|
|||||||
rolling_window_size: 10 # Number of messages to track per user
|
rolling_window_size: 10 # Number of messages to track per user
|
||||||
rolling_window_minutes: 15 # Time window for tracking
|
rolling_window_minutes: 15 # Time window for tracking
|
||||||
batch_window_seconds: 3 # Wait this long for more messages before analyzing (debounce)
|
batch_window_seconds: 3 # Wait this long for more messages before analyzing (debounce)
|
||||||
|
escalation_threshold: 0.25 # Triage toxicity score that triggers re-analysis with heavy model
|
||||||
|
|
||||||
game_channels:
|
game_channels:
|
||||||
gta-online: "GTA Online"
|
gta-online: "GTA Online"
|
||||||
|
|||||||
Reference in New Issue
Block a user