From 3d252ee729c91296e693ad9b95f04c46f43b0a76 Mon Sep 17 00:00:00 2001
From: AJ Isaacs <ajisaacs27@gmail.com>
Date: Thu, 26 Feb 2026 13:20:54 -0500
Subject: [PATCH] feat: classify mention intent before running expensive scan

Adds LLM triage on bot @mentions to determine if the user is chatting
or reporting bad behavior. Only 'report' intents trigger the 30-message
scan; 'chat' intents skip the scan and let ChatCog handle it.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cogs/sentiment/__init__.py | 15 ++++++++++---
 utils/llm_client.py        | 43 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/cogs/sentiment/__init__.py b/cogs/sentiment/__init__.py
index 6aed028..92bc6fb 100644
--- a/cogs/sentiment/__init__.py
+++ b/cogs/sentiment/__init__.py
@@ -103,9 +103,18 @@ class SentimentCog(commands.Cog):
             or f"<@!{self.bot.user.id}>" in (message.content or "")
         )
         if bot_mentioned_in_text:
-            mention_config = config.get("mention_scan", {})
-            if mention_config.get("enabled", True):
-                await self._maybe_start_mention_scan(message, mention_config)
+            # Classify intent: only run expensive mention scan for reports,
+            # let ChatCog handle casual chat/questions
+            intent = await self.bot.llm.classify_mention_intent(
+                message.content or ""
+            )
+            logger.info(
+                "Mention intent for %s: %s", message.author, intent
+            )
+            if intent == "report":
+                mention_config = config.get("mention_scan", {})
+                if mention_config.get("enabled", True):
+                    await self._maybe_start_mention_scan(message, mention_config)
             return
 
         # Skip if empty
diff --git a/utils/llm_client.py b/utils/llm_client.py
index 51cced6..4d3c7d4 100644
--- a/utils/llm_client.py
+++ b/utils/llm_client.py
@@ -675,6 +675,49 @@ class LLMClient:
                 self._log_llm("chat", elapsed, False, req_json, error=str(e))
                 return None
 
+    async def classify_mention_intent(self, message_text: str) -> str:
+        """Classify whether a bot @mention is a chat/question or a moderation report.
+
+        Returns 'chat' or 'report'. Defaults to 'chat' on failure.
+        """
+        prompt = (
+            "You are classifying the intent of a Discord message that @mentioned a bot.\n"
+            "Reply with EXACTLY one word: 'chat' or 'report'.\n\n"
+            "- 'chat' = the user is talking to the bot, asking a question, joking, greeting, "
+            "or having a conversation. This includes things like 'what do you think?', "
+            "'hey bot', 'do you know...', or any general interaction.\n"
+            "- 'report' = the user is flagging bad behavior, asking the bot to check/scan "
+            "the chat, reporting toxicity, or pointing out someone being problematic. "
+            "This includes things like 'check this', 'they're being toxic', 'look at what "
+            "they said', 'scan the chat', or concerns about other users.\n\n"
+            "If unsure, say 'chat'."
+        )
+        t0 = time.monotonic()
+
+        async with self._semaphore:
+            try:
+                temp_kwargs = {"temperature": 0.0} if self._supports_temperature else {}
+                response = await self._client.chat.completions.create(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": prompt},
+                        {"role": "user", "content": message_text},
+                    ],
+                    **temp_kwargs,
+                    max_completion_tokens=16,
+                )
+                elapsed = int((time.monotonic() - t0) * 1000)
+                content = (response.choices[0].message.content or "").strip().lower()
+                intent = "report" if "report" in content else "chat"
+                self._log_llm("classify_intent", elapsed, True, message_text[:200], intent)
+                logger.info("Mention intent classified as '%s' for: %s", intent, message_text[:80])
+                return intent
+            except Exception as e:
+                elapsed = int((time.monotonic() - t0) * 1000)
+                logger.error("Intent classification error: %s", e)
+                self._log_llm("classify_intent", elapsed, False, message_text[:200], error=str(e))
+                return "chat"
+
     async def extract_memories(
         self,
         conversation: list[dict[str, str]],