feat: add warning expiration and exclude moderated messages from context

Warning flag now auto-expires after a configurable duration (warning_expiration_minutes, default 30m). After expiry, the user must be re-warned before a mute can be issued. Messages that triggered moderation actions (warnings/mutes) are now excluded from the LLM context window in both buffered analysis and mention scans, preventing already-actioned content from influencing future scoring. Uses in-memory tracking plus bot reaction fallback for post-restart coverage. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 13:39:49 -05:00
parent 36df4cf5a6
commit eb7eb81621
6 changed files with 86 additions and 18 deletions
--- a/cogs/sentiment/init.py
+++ b/cogs/sentiment/init.py
@@ -66,6 +66,7 @@ class SentimentCog(commands.Cog):
        self._mention_scan_results: dict[int, str] = {}  # {trigger_message_id: findings_summary}
        self._analyzed_message_ids: set[int] = set()  # Discord message IDs already analyzed
        self._max_analyzed_ids = 500
+        self._moderated_message_ids: set[int] = set()  # Message IDs that triggered moderation


    async def cog_load(self):
@@ -206,21 +207,24 @@ class SentimentCog(commands.Cog):
        thresholds: dict,
        db_message_id: int | None,
        violated_rules: list[int] | None = None,
-    ) -> None:
-        """Issue a warning or mute based on scores and thresholds."""
+    ) -> bool:
+        """Issue a warning or mute based on scores and thresholds.
+        Returns True if any moderation action was taken."""
        rules_config = _RULES_DICT
        mute_threshold = self.bot.drama_tracker.get_mute_threshold(user_id, thresholds["mute"])
-        user_data = self.bot.drama_tracker.get_user(user_id)
        if drama_score >= mute_threshold or score >= thresholds["spike_mute"]:
            effective_score = max(drama_score, score)
-            if user_data.warned_since_reset:
+            if self.bot.drama_tracker.is_warned(user_id):
                await mute_user(self.bot, message, effective_score, categories, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config)
            else:
                logger.info("Downgrading mute to warning for %s (no prior warning)", message.author)
                await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config)
+            return True
        elif drama_score >= thresholds["warning"] or score >= thresholds["spike_warn"]:
            effective_score = max(drama_score, score)
            await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config)
+            return True
+        return False

    @staticmethod
    def _build_rules_context() -> str:
@@ -484,10 +488,14 @@ class SentimentCog(commands.Cog):

        # Moderation
        if not dry_run:
-            await self._apply_moderation(
+            acted = await self._apply_moderation(
                user_ref_msg, user_id, score, drama_score, categories, thresholds, db_message_id,
                violated_rules=violated_rules,
            )
+            if acted:
+                for m in user_msgs:
+                    self._moderated_message_ids.add(m.id)
+                self._prune_moderated_ids()

        return (username, score, drama_score, categories)

@@ -514,11 +522,13 @@ class SentimentCog(commands.Cog):
        oldest_buffered = messages[0]
        history_messages: list[discord.Message] = []
        try:
-            async for msg in channel.history(limit=context_count + 5, before=oldest_buffered):
+            async for msg in channel.history(limit=context_count + 10, before=oldest_buffered):
                if msg.author.bot:
                    continue
                if not msg.content or not msg.content.strip():
                    continue
+                if self._was_moderated(msg):
+                    continue
                history_messages.append(msg)
                if len(history_messages) >= context_count:
                    break
@@ -636,6 +646,19 @@ class SentimentCog(commands.Cog):
            sorted_ids = sorted(self._analyzed_message_ids)
            self._analyzed_message_ids = set(sorted_ids[len(sorted_ids) // 2:])

+    def _prune_moderated_ids(self):
+        """Cap the moderated message ID set to avoid unbounded growth."""
+        if len(self._moderated_message_ids) > self._max_analyzed_ids:
+            sorted_ids = sorted(self._moderated_message_ids)
+            self._moderated_message_ids = set(sorted_ids[len(sorted_ids) // 2:])
+
+    def _was_moderated(self, msg: discord.Message) -> bool:
+        """Check if a message already triggered moderation (in-memory or via reaction)."""
+        if msg.id in self._moderated_message_ids:
+            return True
+        # Fall back to checking for bot's warning reaction (survives restarts)
+        return any(str(r.emoji) == "\u26a0\ufe0f" and r.me for r in msg.reactions)
+
    async def _maybe_start_mention_scan(
        self, trigger_message: discord.Message, mention_config: dict
    ):
@@ -683,14 +706,16 @@ class SentimentCog(commands.Cog):
        sentiment_config = config.get("sentiment", {})
        game_channels = config.get("game_channels", {})

-        # Fetch recent messages (before the trigger, skip bots/empty)
+        # Fetch recent messages (before the trigger, skip bots/empty/moderated)
        raw_messages: list[discord.Message] = []
        try:
-            async for msg in channel.history(limit=scan_count + 10, before=trigger_message):
+            async for msg in channel.history(limit=scan_count + 20, before=trigger_message):
                if msg.author.bot:
                    continue
                if not msg.content or not msg.content.strip():
                    continue
+                if self._was_moderated(msg):
+                    continue
                raw_messages.append(msg)
                if len(raw_messages) >= scan_count:
                    break
--- a/cogs/sentiment/state.py
+++ b/cogs/sentiment/state.py
@@ -22,6 +22,7 @@ def save_user_state(bot, dirty_users: set[int], user_id: int) -> None:
        warned=user_data.warned_since_reset,
        last_offense_at=user_data.last_offense_time or None,
        aliases=_aliases_csv(user_data),
+        warning_expires_at=user_data.warning_expires_at or None,
    ))
    dirty_users.discard(user_id)

@@ -44,5 +45,6 @@ async def flush_dirty_states(bot, dirty_users: set[int]) -> None:
            warned=user_data.warned_since_reset,
            last_offense_at=user_data.last_offense_time or None,
            aliases=_aliases_csv(user_data),
+            warning_expires_at=user_data.warning_expires_at or None,
        )
    logger.info("Flushed %d dirty user states to DB.", len(dirty))