feat: add relevance-gated proactive replies

Replace random-only proactive reply logic with LLM relevance check. The bot now evaluates recent conversation context and user memory before deciding to jump in, then applies reply_chance as a second gate. Bump reply_chance values higher since the relevance filter prevents most irrelevant replies. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 11:34:53 -05:00
parent 175c7ad219
commit 787b083e00
3 changed files with 109 additions and 10 deletions
--- a/utils/llm_client.py
+++ b/utils/llm_client.py
@@ -804,6 +804,63 @@ class LLMClient:
                self._log_llm("pick_reaction", elapsed, False, message_text[:200], error=str(e))
                return None

+    async def check_reply_relevance(
+        self, recent_messages: list[str], memory_context: str = "",
+    ) -> bool:
+        """Check if the bot would naturally want to jump into a conversation.
+
+        Returns True if the conversation is something worth replying to.
+        """
+        prompt = (
+            "You're a regular member of a Discord gaming server. You're reading chat and deciding "
+            "whether you'd naturally want to jump in and say something.\n\n"
+            "Say YES if:\n"
+            "- Someone said something you'd have a strong reaction to\n"
+            "- You know something relevant about these people (see memory context)\n"
+            "- Someone is wrong or has a hot take you'd want to respond to\n"
+            "- The conversation is funny or interesting enough to comment on\n"
+            "- Someone mentioned something you have an opinion on\n\n"
+            "Say NO if:\n"
+            "- It's mundane/boring small talk\n"
+            "- You'd have nothing interesting to add\n"
+            "- People are just chatting normally and don't need interruption\n\n"
+            "Reply with EXACTLY one word: YES or NO."
+        )
+        convo_text = "\n".join(recent_messages[-5:])
+        user_content = ""
+        if memory_context:
+            user_content += f"{memory_context}\n\n"
+        user_content += f"Recent chat:\n{convo_text}"
+
+        t0 = time.monotonic()
+
+        async with self._semaphore:
+            try:
+                temp_kwargs = {"temperature": 0.3} if self._supports_temperature else {}
+                response = await self._client.chat.completions.create(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": prompt},
+                        {"role": "user", "content": user_content[:1000]},
+                    ],
+                    **temp_kwargs,
+                    max_completion_tokens=16,
+                )
+                elapsed = int((time.monotonic() - t0) * 1000)
+                content = (response.choices[0].message.content or "").strip().lower()
+                is_relevant = "yes" in content
+                self._log_llm(
+                    "check_relevance", elapsed, True,
+                    user_content[:300], content,
+                )
+                logger.debug("Relevance check: %s", content)
+                return is_relevant
+            except Exception as e:
+                elapsed = int((time.monotonic() - t0) * 1000)
+                logger.error("Relevance check error: %s", e)
+                self._log_llm("check_relevance", elapsed, False, user_content[:300], error=str(e))
+                return False
+
    async def extract_memories(
        self,
        conversation: list[dict[str, str]],