Compare commits

...

2 Commits

Author SHA1 Message Date
b9bac899f9 Add two-tier LLM analysis with triage/escalation
Triage model (LLM_MODEL) handles every message cheaply. If toxicity
>= 0.25, off_topic, or coherence < 0.6, the message is re-analyzed
with the heavy model (LLM_ESCALATION_MODEL). Chat, image analysis,
/bcs-test, and /bcs-scan always use the heavy model.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 18:33:36 -05:00
64e9474c99 Add message batching (debounce) for rapid-fire senders
Buffer messages per user+channel and wait for a configurable window
(batch_window_seconds: 3) before analyzing. Combines burst messages
into a single LLM call instead of analyzing each one separately.
Replaces cooldown_between_analyses with the debounce approach.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 18:19:01 -05:00
5 changed files with 108 additions and 16 deletions

7
bot.py
View File

@@ -65,12 +65,16 @@ class BCSBot(commands.Bot):
self.config = config
# LLM client (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
# LLM clients (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
llm_base_url = os.getenv("LLM_BASE_URL", "http://athena.lan:11434")
llm_model = os.getenv("LLM_MODEL", "Qwen3-VL-32B-Thinking-Q8_0")
llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
self.llm = LLMClient(llm_base_url, llm_model, llm_api_key)
# Heavy/escalation model for re-analysis, chat, and manual commands
llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", llm_model)
self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key)
# Drama tracker
sentiment = config.get("sentiment", {})
timeouts = config.get("timeouts", {})
@@ -167,6 +171,7 @@ class BCSBot(commands.Bot):
async def close(self):
await self.db.close()
await self.llm.close()
await self.llm_heavy.close()
await super().close()

View File

@@ -84,7 +84,7 @@ class ChatCog(commands.Cog):
image_attachment.filename,
user_text[:80],
)
response = await self.bot.llm.analyze_image(
response = await self.bot.llm_heavy.analyze_image(
image_bytes,
SCOREBOARD_ROAST,
user_text=user_text,
@@ -108,7 +108,7 @@ class ChatCog(commands.Cog):
{"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"}
)
response = await self.bot.llm.chat(
response = await self.bot.llm_heavy.chat(
list(self._chat_history[ch_id]),
CHAT_PERSONALITY,
on_first_token=start_typing,

View File

@@ -126,9 +126,19 @@ class CommandsCog(commands.Cog):
inline=True,
)
embed.add_field(
name="LLM",
value=f"`{self.bot.llm.model}` @ `{self.bot.llm.host}`",
inline=False,
name="Triage Model",
value=f"`{self.bot.llm.model}`",
inline=True,
)
embed.add_field(
name="Escalation Model",
value=f"`{self.bot.llm_heavy.model}`",
inline=True,
)
embed.add_field(
name="LLM Host",
value=f"`{self.bot.llm.host}`",
inline=True,
)
await interaction.response.send_message(embed=embed, ephemeral=True)
@@ -301,7 +311,7 @@ class CommandsCog(commands.Cog):
else "(no prior context)"
)
result = await self.bot.llm.analyze_message(msg.content, context)
result = await self.bot.llm_heavy.analyze_message(msg.content, context)
if result is None:
embed = discord.Embed(
title=f"Analysis: {msg.author.display_name}",
@@ -374,7 +384,7 @@ class CommandsCog(commands.Cog):
channel_context = "\n".join(lines)
user_notes = self.bot.drama_tracker.get_user_notes(interaction.user.id)
raw, parsed = await self.bot.llm.raw_analyze(
raw, parsed = await self.bot.llm_heavy.raw_analyze(
message, user_notes=user_notes, channel_context=channel_context,
)

View File

@@ -21,12 +21,22 @@ class SentimentCog(commands.Cog):
self._dirty_users: set[int] = set()
# Per-user redirect cooldown: {user_id: last_redirect_datetime}
self._redirect_cooldowns: dict[int, datetime] = {}
# Debounce buffer: keyed by (channel_id, user_id), stores list of messages
self._message_buffer: dict[tuple[int, int], list[discord.Message]] = {}
# Pending debounce timer tasks
self._debounce_tasks: dict[tuple[int, int], asyncio.Task] = {}
async def cog_load(self):
self._flush_states.start()
async def cog_unload(self):
self._flush_states.cancel()
# Cancel all pending debounce timers and process remaining buffers
for task in self._debounce_tasks.values():
task.cancel()
self._debounce_tasks.clear()
for key in list(self._message_buffer):
await self._process_buffered(key)
# Final flush on shutdown
await self._flush_dirty_states()
@@ -75,27 +85,93 @@ class SentimentCog(commands.Cog):
if not message.content or not message.content.strip():
return
# Check per-user analysis cooldown
sentiment_config = config.get("sentiment", {})
cooldown = sentiment_config.get("cooldown_between_analyses", 2)
if not self.bot.drama_tracker.can_analyze(message.author.id, cooldown):
# Buffer the message and start/reset debounce timer
key = (message.channel.id, message.author.id)
if key not in self._message_buffer:
self._message_buffer[key] = []
self._message_buffer[key].append(message)
# Cancel existing debounce timer for this user+channel
existing_task = self._debounce_tasks.get(key)
if existing_task and not existing_task.done():
existing_task.cancel()
# Start new debounce timer
batch_window = config.get("sentiment", {}).get("batch_window_seconds", 3)
self._debounce_tasks[key] = asyncio.create_task(
self._debounce_then_process(key, batch_window)
)
async def _debounce_then_process(self, key: tuple[int, int], delay: float):
"""Sleep for the debounce window, then process the buffered messages."""
try:
await asyncio.sleep(delay)
await self._process_buffered(key)
except asyncio.CancelledError:
pass # Timer was reset by a new message — expected
async def _process_buffered(self, key: tuple[int, int]):
"""Combine buffered messages and run the analysis pipeline once."""
messages = self._message_buffer.pop(key, [])
self._debounce_tasks.pop(key, None)
if not messages:
return
# Use the last message as the reference for channel, author, guild, etc.
message = messages[-1]
combined_content = "\n".join(m.content for m in messages if m.content and m.content.strip())
if not combined_content.strip():
return
batch_count = len(messages)
if batch_count > 1:
logger.info(
"Batched %d messages from %s in #%s",
batch_count, message.author.display_name,
getattr(message.channel, 'name', 'unknown'),
)
config = self.bot.config
monitoring = config.get("monitoring", {})
sentiment_config = config.get("sentiment", {})
# Build channel context for game detection
game_channels = config.get("game_channels", {})
channel_context = self._build_channel_context(message, game_channels)
# Analyze the message
# Analyze the combined message (triage with lightweight model)
context = self._get_context(message)
user_notes = self.bot.drama_tracker.get_user_notes(message.author.id)
result = await self.bot.llm.analyze_message(
message.content, context, user_notes=user_notes,
combined_content, context, user_notes=user_notes,
channel_context=channel_context,
)
if result is None:
return
# Escalation: re-analyze with heavy model if triage flags something
escalation_threshold = sentiment_config.get("escalation_threshold", 0.25)
needs_escalation = (
result["toxicity_score"] >= escalation_threshold
or result.get("off_topic", False)
or result.get("coherence_score", 1.0) < 0.6
)
if needs_escalation:
triage_score = result["toxicity_score"]
heavy_result = await self.bot.llm_heavy.analyze_message(
combined_content, context, user_notes=user_notes,
channel_context=channel_context,
)
if heavy_result is not None:
logger.info(
"Escalated to heavy model (triage_score=%.2f) for %s",
triage_score, message.author.display_name,
)
result = heavy_result
score = result["toxicity_score"]
categories = result["categories"]
reasoning = result["reasoning"]
@@ -128,7 +204,7 @@ class SentimentCog(commands.Cog):
channel_id=message.channel.id,
user_id=message.author.id,
username=message.author.display_name,
content=message.content,
content=combined_content,
message_ts=message.created_at.replace(tzinfo=timezone.utc),
toxicity_score=score,
drama_score=drama_score,

View File

@@ -17,7 +17,8 @@ sentiment:
context_messages: 3 # Number of previous messages to include as context
rolling_window_size: 10 # Number of messages to track per user
rolling_window_minutes: 15 # Time window for tracking
cooldown_between_analyses: 2 # Seconds between analyzing same user's messages
batch_window_seconds: 3 # Wait this long for more messages before analyzing (debounce)
escalation_threshold: 0.25 # Triage toxicity score that triggers re-analysis with heavy model
game_channels:
gta-online: "GTA Online"