diff --git a/bot.py b/bot.py index fc79ade..5650d96 100644 --- a/bot.py +++ b/bot.py @@ -65,12 +65,16 @@ class BCSBot(commands.Bot): self.config = config - # LLM client (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI) + # LLM clients (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI) llm_base_url = os.getenv("LLM_BASE_URL", "http://athena.lan:11434") llm_model = os.getenv("LLM_MODEL", "Qwen3-VL-32B-Thinking-Q8_0") llm_api_key = os.getenv("LLM_API_KEY", "not-needed") self.llm = LLMClient(llm_base_url, llm_model, llm_api_key) + # Heavy/escalation model for re-analysis, chat, and manual commands + llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", llm_model) + self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key) + # Drama tracker sentiment = config.get("sentiment", {}) timeouts = config.get("timeouts", {}) @@ -167,6 +171,7 @@ class BCSBot(commands.Bot): async def close(self): await self.db.close() await self.llm.close() + await self.llm_heavy.close() await super().close() diff --git a/cogs/chat.py b/cogs/chat.py index e8e0bda..4ecde05 100644 --- a/cogs/chat.py +++ b/cogs/chat.py @@ -84,7 +84,7 @@ class ChatCog(commands.Cog): image_attachment.filename, user_text[:80], ) - response = await self.bot.llm.analyze_image( + response = await self.bot.llm_heavy.analyze_image( image_bytes, SCOREBOARD_ROAST, user_text=user_text, @@ -108,7 +108,7 @@ class ChatCog(commands.Cog): {"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"} ) - response = await self.bot.llm.chat( + response = await self.bot.llm_heavy.chat( list(self._chat_history[ch_id]), CHAT_PERSONALITY, on_first_token=start_typing, diff --git a/cogs/commands.py b/cogs/commands.py index 2413778..6a514ef 100644 --- a/cogs/commands.py +++ b/cogs/commands.py @@ -126,9 +126,19 @@ class CommandsCog(commands.Cog): inline=True, ) embed.add_field( - name="LLM", - value=f"`{self.bot.llm.model}` @ `{self.bot.llm.host}`", - inline=False, + name="Triage Model", + value=f"`{self.bot.llm.model}`", + inline=True, + ) + embed.add_field( + name="Escalation Model", + value=f"`{self.bot.llm_heavy.model}`", + inline=True, + ) + embed.add_field( + name="LLM Host", + value=f"`{self.bot.llm.host}`", + inline=True, ) await interaction.response.send_message(embed=embed, ephemeral=True) @@ -301,7 +311,7 @@ class CommandsCog(commands.Cog): else "(no prior context)" ) - result = await self.bot.llm.analyze_message(msg.content, context) + result = await self.bot.llm_heavy.analyze_message(msg.content, context) if result is None: embed = discord.Embed( title=f"Analysis: {msg.author.display_name}", @@ -374,7 +384,7 @@ class CommandsCog(commands.Cog): channel_context = "\n".join(lines) user_notes = self.bot.drama_tracker.get_user_notes(interaction.user.id) - raw, parsed = await self.bot.llm.raw_analyze( + raw, parsed = await self.bot.llm_heavy.raw_analyze( message, user_notes=user_notes, channel_context=channel_context, ) diff --git a/cogs/sentiment.py b/cogs/sentiment.py index 3e3ebe7..c2b0b2e 100644 --- a/cogs/sentiment.py +++ b/cogs/sentiment.py @@ -141,7 +141,7 @@ class SentimentCog(commands.Cog): game_channels = config.get("game_channels", {}) channel_context = self._build_channel_context(message, game_channels) - # Analyze the combined message + # Analyze the combined message (triage with lightweight model) context = self._get_context(message) user_notes = self.bot.drama_tracker.get_user_notes(message.author.id) result = await self.bot.llm.analyze_message( @@ -152,6 +152,26 @@ class SentimentCog(commands.Cog): if result is None: return + # Escalation: re-analyze with heavy model if triage flags something + escalation_threshold = sentiment_config.get("escalation_threshold", 0.25) + needs_escalation = ( + result["toxicity_score"] >= escalation_threshold + or result.get("off_topic", False) + or result.get("coherence_score", 1.0) < 0.6 + ) + if needs_escalation: + triage_score = result["toxicity_score"] + heavy_result = await self.bot.llm_heavy.analyze_message( + combined_content, context, user_notes=user_notes, + channel_context=channel_context, + ) + if heavy_result is not None: + logger.info( + "Escalated to heavy model (triage_score=%.2f) for %s", + triage_score, message.author.display_name, + ) + result = heavy_result + score = result["toxicity_score"] categories = result["categories"] reasoning = result["reasoning"] diff --git a/config.yaml b/config.yaml index dbb28fc..7ea70c2 100644 --- a/config.yaml +++ b/config.yaml @@ -18,6 +18,7 @@ sentiment: rolling_window_size: 10 # Number of messages to track per user rolling_window_minutes: 15 # Time window for tracking batch_window_seconds: 3 # Wait this long for more messages before analyzing (debounce) + escalation_threshold: 0.25 # Triage toxicity score that triggers re-analysis with heavy model game_channels: gta-online: "GTA Online"