feat: add warning expiration and exclude moderated messages from context

Warning flag now auto-expires after a configurable duration (warning_expiration_minutes, default 30m). After expiry, the user must be re-warned before a mute can be issued. Messages that triggered moderation actions (warnings/mutes) are now excluded from the LLM context window in both buffered analysis and mention scans, preventing already-actioned content from influencing future scoring. Uses in-memory tracking plus bot reaction fallback for post-restart coverage. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-28 13:39:49 -05:00
parent 36df4cf5a6
commit eb7eb81621
6 changed files with 86 additions and 18 deletions
@@ -144,6 +144,12 @@ class Database:
                ALTER TABLE UserState ADD Aliases NVARCHAR(500) NULL
        """)

+        # --- Schema migration for warning expiration ---
+        cursor.execute("""
+            IF COL_LENGTH('UserState', 'WarningExpiresAt') IS NULL
+                ALTER TABLE UserState ADD WarningExpiresAt FLOAT NULL
+        """)
+
        cursor.execute("""
            IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'BotSettings')
            CREATE TABLE BotSettings (
@@ -321,19 +327,20 @@ class Database:
        warned: bool = False,
        last_offense_at: float | None = None,
        aliases: str | None = None,
+        warning_expires_at: float | None = None,
    ) -> None:
-        """Upsert user state (offense count, immunity, off-topic count, coherence baseline, notes, warned, last offense time, aliases)."""
+        """Upsert user state (offense count, immunity, off-topic count, coherence baseline, notes, warned, last offense time, aliases, warning expiration)."""
        if not self._available:
            return
        try:
            await asyncio.to_thread(
                self._save_user_state_sync,
-                user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned, last_offense_at, aliases,
+                user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned, last_offense_at, aliases, warning_expires_at,
            )
        except Exception:
            logger.exception("Failed to save user state")

-    def _save_user_state_sync(self, user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned, last_offense_at, aliases):
+    def _save_user_state_sync(self, user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned, last_offense_at, aliases, warning_expires_at):
        conn = self._connect()
        try:
            cursor = conn.cursor()
@@ -344,14 +351,14 @@ class Database:
                   WHEN MATCHED THEN
                       UPDATE SET OffenseCount = ?, Immune = ?, OffTopicCount = ?,
                                  BaselineCoherence = ?, UserNotes = ?, Warned = ?,
-                                  LastOffenseAt = ?, Aliases = ?,
+                                  LastOffenseAt = ?, Aliases = ?, WarningExpiresAt = ?,
                                  UpdatedAt = SYSUTCDATETIME()
                   WHEN NOT MATCHED THEN
-                       INSERT (UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned, LastOffenseAt, Aliases)
-                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);""",
+                       INSERT (UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned, LastOffenseAt, Aliases, WarningExpiresAt)
+                       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);""",
                user_id,
-                offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0, last_offense_at, aliases,
-                user_id, offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0, last_offense_at, aliases,
+                offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0, last_offense_at, aliases, warning_expires_at,
+                user_id, offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0, last_offense_at, aliases, warning_expires_at,
            )
            cursor.close()
        finally:
@@ -394,7 +401,7 @@ class Database:
        try:
            cursor = conn.cursor()
            cursor.execute(
-                "SELECT UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned, LastOffenseAt, Aliases FROM UserState"
+                "SELECT UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned, LastOffenseAt, Aliases, WarningExpiresAt FROM UserState"
            )
            rows = cursor.fetchall()
            cursor.close()
@@ -409,6 +416,7 @@ class Database:
                    "warned": bool(row[6]),
                    "last_offense_at": float(row[7]) if row[7] is not None else 0.0,
                    "aliases": row[8] or "",
+                    "warning_expires_at": float(row[9]) if row[9] is not None else 0.0,
                }
                for row in rows
            ]
@@ -19,6 +19,7 @@ class UserDrama:
    last_warning_time: float = 0.0
    last_analysis_time: float = 0.0
    warned_since_reset: bool = False
+    warning_expires_at: float = 0.0
    immune: bool = False
    # Topic drift tracking
    off_topic_count: int = 0
@@ -40,10 +41,12 @@ class DramaTracker:
        window_size: int = 10,
        window_minutes: int = 15,
        offense_reset_minutes: int = 120,
+        warning_expiration_minutes: int = 30,
    ):
        self.window_size = window_size
        self.window_seconds = window_minutes * 60
        self.offense_reset_seconds = offense_reset_minutes * 60
+        self.warning_expiration_seconds = warning_expiration_minutes * 60
        self._users: dict[int, UserDrama] = {}

    def get_user(self, user_id: int) -> UserDrama:
@@ -74,6 +77,7 @@ class DramaTracker:

    def get_drama_score(self, user_id: int, escalation_boost: float = 0.04) -> float:
        user = self.get_user(user_id)
+        self._expire_warning(user)
        now = time.time()
        self._prune_entries(user, now)

@@ -105,6 +109,7 @@ class DramaTracker:
    def get_mute_threshold(self, user_id: int, base_threshold: float) -> float:
        """Lower the mute threshold if user was already warned."""
        user = self.get_user(user_id)
+        self._expire_warning(user)
        if user.warned_since_reset:
            return base_threshold - 0.05
        return base_threshold
@@ -123,12 +128,34 @@ class DramaTracker:
        user.offense_count += 1
        user.last_offense_time = now
        user.warned_since_reset = False
+        user.warning_expires_at = 0.0
        return user.offense_count

    def record_warning(self, user_id: int) -> None:
        user = self.get_user(user_id)
-        user.last_warning_time = time.time()
+        now = time.time()
+        user.last_warning_time = now
        user.warned_since_reset = True
+        if self.warning_expiration_seconds > 0:
+            user.warning_expires_at = now + self.warning_expiration_seconds
+        else:
+            user.warning_expires_at = 0.0  # Never expires
+
+    def _expire_warning(self, user: UserDrama) -> None:
+        """Clear warned flag if the warning has expired."""
+        if (
+            user.warned_since_reset
+            and user.warning_expires_at > 0
+            and time.time() >= user.warning_expires_at
+        ):
+            user.warned_since_reset = False
+            user.warning_expires_at = 0.0
+
+    def is_warned(self, user_id: int) -> bool:
+        """Check if user is currently warned (respects expiration)."""
+        user = self.get_user(user_id)
+        self._expire_warning(user)
+        return user.warned_since_reset

    def can_warn(self, user_id: int, cooldown_minutes: int) -> bool:
        user = self.get_user(user_id)
@@ -303,12 +330,16 @@ class DramaTracker:
                user.notes = state["user_notes"]
            if state.get("warned"):
                user.warned_since_reset = True
+                user.warning_expires_at = state.get("warning_expires_at", 0.0) or 0.0
+                # Expire warning at load time if it's past due
+                self._expire_warning(user)
            if state.get("last_offense_at"):
                user.last_offense_time = state["last_offense_at"]
                # Apply time-based offense reset at load time
                if time.time() - user.last_offense_time > self.offense_reset_seconds:
                    user.offense_count = 0
                    user.warned_since_reset = False
+                    user.warning_expires_at = 0.0
                    user.last_offense_time = 0.0
            if state.get("aliases"):
                user.aliases = [a.strip() for a in state["aliases"].split(",") if a.strip()]