From 7417908142e1921cbd9b6507940a3490c6e90ada Mon Sep 17 00:00:00 2001
From: AJ Isaacs <ajisaacs27@gmail.com>
Date: Wed, 25 Feb 2026 15:48:02 -0500
Subject: [PATCH] fix: separate context from new messages so prior-cycle chat
 doesn't inflate scores

The conversation analysis was re-scoring old messages alongside new ones,
causing users to get penalized repeatedly for already-scored messages.
A "--- NEW MESSAGES ---" separator now marks which messages are new, and
the prompt instructs the LLM to score only those. Also fixes bot-mention
detection to require an explicit @mention in message text rather than
treating reply-pings as scans (so toxic replies to bot warnings aren't
silently skipped).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cogs/sentiment.py    | 28 +++++++++++++++-------------
 prompts/analysis.txt |  6 ++++--
 utils/llm_client.py  | 14 ++++++++++++--
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/cogs/sentiment.py b/cogs/sentiment.py
index 2e90d2a..8612f51 100644
--- a/cogs/sentiment.py
+++ b/cogs/sentiment.py
@@ -87,19 +87,18 @@ class SentimentCog(commands.Cog):
         if self.bot.drama_tracker.is_immune(message.author.id):
             return
 
-        # Messages directed at the bot (mentions, replies) shouldn't be scored
-        # for toxicity — but @mentions can trigger a scan of recent chat
-        directed_at_bot = self.bot.user in message.mentions
-        if not directed_at_bot and message.reference and message.reference.message_id:
-            ref = message.reference.cached_message
-            if ref and ref.author.id == self.bot.user.id:
-                directed_at_bot = True
-        if directed_at_bot:
-            # @mention (not just reply-to-bot) triggers a mention scan
-            if self.bot.user in message.mentions:
-                mention_config = config.get("mention_scan", {})
-                if mention_config.get("enabled", True):
-                    await self._maybe_start_mention_scan(message, mention_config)
+        # Explicit @mention of the bot triggers a mention scan instead of scoring.
+        # Reply-pings (Discord auto-adds replied-to user to mentions) should NOT
+        # trigger scans — and reply-to-bot messages should still be scored normally
+        # so toxic replies to bot warnings aren't silently skipped.
+        bot_mentioned_in_text = (
+            f"<@{self.bot.user.id}>" in (message.content or "")
+            or f"<@!{self.bot.user.id}>" in (message.content or "")
+        )
+        if bot_mentioned_in_text:
+            mention_config = config.get("mention_scan", {})
+            if mention_config.get("enabled", True):
+                await self._maybe_start_mention_scan(message, mention_config)
             return
 
         # Skip if empty
@@ -166,6 +165,7 @@ class SentimentCog(commands.Cog):
         history_messages.reverse()  # chronological order
 
         # Combine: history (context) + buffered (new messages to analyze)
+        new_message_start = len(history_messages)
         all_messages = history_messages + messages
 
         # Build msg_id_to_author lookup for reply resolution
@@ -215,6 +215,7 @@ class SentimentCog(commands.Cog):
             conversation,
             channel_context=channel_context,
             user_notes_map=user_notes_map,
+            new_message_start=new_message_start,
         )
 
         if result is None:
@@ -233,6 +234,7 @@ class SentimentCog(commands.Cog):
                 conversation,
                 channel_context=channel_context,
                 user_notes_map=user_notes_map,
+                new_message_start=new_message_start,
             )
             if heavy_result is not None:
                 logger.info(
diff --git a/prompts/analysis.txt b/prompts/analysis.txt
index bf8d165..e49a7ad 100644
--- a/prompts/analysis.txt
+++ b/prompts/analysis.txt
@@ -40,8 +40,10 @@ Use the report_analysis tool to report your analysis of the TARGET MESSAGE only.
 
 CONVERSATION-LEVEL ANALYSIS (when given a CONVERSATION BLOCK instead of a single TARGET MESSAGE):
 When you receive a full conversation block with multiple users, use the report_conversation_scan tool instead:
-- Provide ONE finding per user (not per message) — aggregate their behavior across the conversation.
-- Weight their average tone and worst message equally when determining the toxicity_score.
+- The conversation block may contain a "--- NEW MESSAGES (score only these) ---" separator. Messages ABOVE the separator are CONTEXT ONLY (already scored in a prior cycle) — do NOT let them inflate scores. Messages BELOW the separator are the NEW messages to score.
+- Provide ONE finding per user who has NEW messages (not per message).
+- Score based ONLY on the user's NEW messages. Use context messages to understand tone and relationships, but do NOT penalize a user for something they said in the context section.
+- If a user's only new message is benign (e.g. "I got the 17.."), score it low regardless of what they said in context.
 - Use the same scoring bands (0.0-1.0) as for single messages.
 - Quote the worst/most problematic snippet in worst_message (max 100 chars, exact quote).
 - Flag off_topic if user's messages are primarily personal drama, not gaming.
diff --git a/utils/llm_client.py b/utils/llm_client.py
index 8108cb1..8ad5357 100644
--- a/utils/llm_client.py
+++ b/utils/llm_client.py
@@ -383,12 +383,16 @@ class LLMClient:
     def _format_conversation_block(
         messages: list[tuple[str, str, datetime, str | None]],
         now: datetime | None = None,
+        new_message_start: int | None = None,
     ) -> str:
         """Format messages as a compact timestamped chat block.
 
         Each tuple is (username, content, timestamp, reply_to_username).
         Consecutive messages from the same user collapse to indented lines.
         Replies shown as ``username → replied_to:``.
+
+        If *new_message_start* is given, a separator is inserted before that
+        index so the LLM can distinguish context from new messages.
         """
         if now is None:
             now = datetime.now(timezone.utc)
@@ -396,7 +400,12 @@ class LLMClient:
         lines = [f"[Current time: {now.strftime('%I:%M %p')}]", ""]
         last_user = None
 
-        for username, content, ts, reply_to in messages:
+        for idx, (username, content, ts, reply_to) in enumerate(messages):
+            if new_message_start is not None and idx == new_message_start:
+                lines.append("")
+                lines.append("--- NEW MESSAGES (score only these) ---")
+                lines.append("")
+                last_user = None  # reset collapse so first new msg gets full header
             delta = now - ts.replace(tzinfo=timezone.utc) if ts.tzinfo is None else now - ts
             rel = LLMClient._format_relative_time(delta)
 
@@ -425,12 +434,13 @@ class LLMClient:
         mention_context: str = "",
         channel_context: str = "",
         user_notes_map: dict[str, str] | None = None,
+        new_message_start: int | None = None,
     ) -> dict | None:
         """Analyze a conversation block in one call, returning per-user findings."""
         if not messages:
             return None
 
-        convo_block = self._format_conversation_block(messages)
+        convo_block = self._format_conversation_block(messages, new_message_start=new_message_start)
 
         user_content = f"=== CONVERSATION BLOCK ===\n{convo_block}\n\n"
         if user_notes_map: