From 3d252ee729c91296e693ad9b95f04c46f43b0a76 Mon Sep 17 00:00:00 2001 From: AJ Isaacs Date: Thu, 26 Feb 2026 13:20:54 -0500 Subject: [PATCH] feat: classify mention intent before running expensive scan Adds LLM triage on bot @mentions to determine if the user is chatting or reporting bad behavior. Only 'report' intents trigger the 30-message scan; 'chat' intents skip the scan and let ChatCog handle it. Co-Authored-By: Claude Opus 4.6 --- cogs/sentiment/__init__.py | 15 ++++++++++--- utils/llm_client.py | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/cogs/sentiment/__init__.py b/cogs/sentiment/__init__.py index 6aed028..92bc6fb 100644 --- a/cogs/sentiment/__init__.py +++ b/cogs/sentiment/__init__.py @@ -103,9 +103,18 @@ class SentimentCog(commands.Cog): or f"<@!{self.bot.user.id}>" in (message.content or "") ) if bot_mentioned_in_text: - mention_config = config.get("mention_scan", {}) - if mention_config.get("enabled", True): - await self._maybe_start_mention_scan(message, mention_config) + # Classify intent: only run expensive mention scan for reports, + # let ChatCog handle casual chat/questions + intent = await self.bot.llm.classify_mention_intent( + message.content or "" + ) + logger.info( + "Mention intent for %s: %s", message.author, intent + ) + if intent == "report": + mention_config = config.get("mention_scan", {}) + if mention_config.get("enabled", True): + await self._maybe_start_mention_scan(message, mention_config) return # Skip if empty diff --git a/utils/llm_client.py b/utils/llm_client.py index 51cced6..4d3c7d4 100644 --- a/utils/llm_client.py +++ b/utils/llm_client.py @@ -675,6 +675,49 @@ class LLMClient: self._log_llm("chat", elapsed, False, req_json, error=str(e)) return None + async def classify_mention_intent(self, message_text: str) -> str: + """Classify whether a bot @mention is a chat/question or a moderation report. + + Returns 'chat' or 'report'. Defaults to 'chat' on failure. + """ + prompt = ( + "You are classifying the intent of a Discord message that @mentioned a bot.\n" + "Reply with EXACTLY one word: 'chat' or 'report'.\n\n" + "- 'chat' = the user is talking to the bot, asking a question, joking, greeting, " + "or having a conversation. This includes things like 'what do you think?', " + "'hey bot', 'do you know...', or any general interaction.\n" + "- 'report' = the user is flagging bad behavior, asking the bot to check/scan " + "the chat, reporting toxicity, or pointing out someone being problematic. " + "This includes things like 'check this', 'they're being toxic', 'look at what " + "they said', 'scan the chat', or concerns about other users.\n\n" + "If unsure, say 'chat'." + ) + t0 = time.monotonic() + + async with self._semaphore: + try: + temp_kwargs = {"temperature": 0.0} if self._supports_temperature else {} + response = await self._client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": message_text}, + ], + **temp_kwargs, + max_completion_tokens=16, + ) + elapsed = int((time.monotonic() - t0) * 1000) + content = (response.choices[0].message.content or "").strip().lower() + intent = "report" if "report" in content else "chat" + self._log_llm("classify_intent", elapsed, True, message_text[:200], intent) + logger.info("Mention intent classified as '%s' for: %s", intent, message_text[:80]) + return intent + except Exception as e: + elapsed = int((time.monotonic() - t0) * 1000) + logger.error("Intent classification error: %s", e) + self._log_llm("classify_intent", elapsed, False, message_text[:200], error=str(e)) + return "chat" + async def extract_memories( self, conversation: list[dict[str, str]],