From eb7eb816211f75ea82b2aaac7e7b5d3dab2f2192 Mon Sep 17 00:00:00 2001 From: AJ Isaacs Date: Sat, 28 Feb 2026 13:39:49 -0500 Subject: [PATCH] feat: add warning expiration and exclude moderated messages from context Warning flag now auto-expires after a configurable duration (warning_expiration_minutes, default 30m). After expiry, the user must be re-warned before a mute can be issued. Messages that triggered moderation actions (warnings/mutes) are now excluded from the LLM context window in both buffered analysis and mention scans, preventing already-actioned content from influencing future scoring. Uses in-memory tracking plus bot reaction fallback for post-restart coverage. Co-Authored-By: Claude Opus 4.6 --- bot.py | 1 + cogs/sentiment/__init__.py | 41 ++++++++++++++++++++++++++++++-------- cogs/sentiment/state.py | 2 ++ config.yaml | 1 + utils/database.py | 26 +++++++++++++++--------- utils/drama_tracker.py | 33 +++++++++++++++++++++++++++++- 6 files changed, 86 insertions(+), 18 deletions(-) diff --git a/bot.py b/bot.py index 49c191f..2539c06 100644 --- a/bot.py +++ b/bot.py @@ -112,6 +112,7 @@ class BCSBot(commands.Bot): window_size=sentiment.get("rolling_window_size", 10), window_minutes=sentiment.get("rolling_window_minutes", 15), offense_reset_minutes=timeouts.get("offense_reset_minutes", 120), + warning_expiration_minutes=timeouts.get("warning_expiration_minutes", 30), ) def get_mode_config(self) -> dict: diff --git a/cogs/sentiment/__init__.py b/cogs/sentiment/__init__.py index 6513813..7746c4d 100644 --- a/cogs/sentiment/__init__.py +++ b/cogs/sentiment/__init__.py @@ -66,6 +66,7 @@ class SentimentCog(commands.Cog): self._mention_scan_results: dict[int, str] = {} # {trigger_message_id: findings_summary} self._analyzed_message_ids: set[int] = set() # Discord message IDs already analyzed self._max_analyzed_ids = 500 + self._moderated_message_ids: set[int] = set() # Message IDs that triggered moderation async def cog_load(self): @@ -206,21 +207,24 @@ class SentimentCog(commands.Cog): thresholds: dict, db_message_id: int | None, violated_rules: list[int] | None = None, - ) -> None: - """Issue a warning or mute based on scores and thresholds.""" + ) -> bool: + """Issue a warning or mute based on scores and thresholds. + Returns True if any moderation action was taken.""" rules_config = _RULES_DICT mute_threshold = self.bot.drama_tracker.get_mute_threshold(user_id, thresholds["mute"]) - user_data = self.bot.drama_tracker.get_user(user_id) if drama_score >= mute_threshold or score >= thresholds["spike_mute"]: effective_score = max(drama_score, score) - if user_data.warned_since_reset: + if self.bot.drama_tracker.is_warned(user_id): await mute_user(self.bot, message, effective_score, categories, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config) else: logger.info("Downgrading mute to warning for %s (no prior warning)", message.author) await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config) + return True elif drama_score >= thresholds["warning"] or score >= thresholds["spike_warn"]: effective_score = max(drama_score, score) await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config) + return True + return False @staticmethod def _build_rules_context() -> str: @@ -484,10 +488,14 @@ class SentimentCog(commands.Cog): # Moderation if not dry_run: - await self._apply_moderation( + acted = await self._apply_moderation( user_ref_msg, user_id, score, drama_score, categories, thresholds, db_message_id, violated_rules=violated_rules, ) + if acted: + for m in user_msgs: + self._moderated_message_ids.add(m.id) + self._prune_moderated_ids() return (username, score, drama_score, categories) @@ -514,11 +522,13 @@ class SentimentCog(commands.Cog): oldest_buffered = messages[0] history_messages: list[discord.Message] = [] try: - async for msg in channel.history(limit=context_count + 5, before=oldest_buffered): + async for msg in channel.history(limit=context_count + 10, before=oldest_buffered): if msg.author.bot: continue if not msg.content or not msg.content.strip(): continue + if self._was_moderated(msg): + continue history_messages.append(msg) if len(history_messages) >= context_count: break @@ -636,6 +646,19 @@ class SentimentCog(commands.Cog): sorted_ids = sorted(self._analyzed_message_ids) self._analyzed_message_ids = set(sorted_ids[len(sorted_ids) // 2:]) + def _prune_moderated_ids(self): + """Cap the moderated message ID set to avoid unbounded growth.""" + if len(self._moderated_message_ids) > self._max_analyzed_ids: + sorted_ids = sorted(self._moderated_message_ids) + self._moderated_message_ids = set(sorted_ids[len(sorted_ids) // 2:]) + + def _was_moderated(self, msg: discord.Message) -> bool: + """Check if a message already triggered moderation (in-memory or via reaction).""" + if msg.id in self._moderated_message_ids: + return True + # Fall back to checking for bot's warning reaction (survives restarts) + return any(str(r.emoji) == "\u26a0\ufe0f" and r.me for r in msg.reactions) + async def _maybe_start_mention_scan( self, trigger_message: discord.Message, mention_config: dict ): @@ -683,14 +706,16 @@ class SentimentCog(commands.Cog): sentiment_config = config.get("sentiment", {}) game_channels = config.get("game_channels", {}) - # Fetch recent messages (before the trigger, skip bots/empty) + # Fetch recent messages (before the trigger, skip bots/empty/moderated) raw_messages: list[discord.Message] = [] try: - async for msg in channel.history(limit=scan_count + 10, before=trigger_message): + async for msg in channel.history(limit=scan_count + 20, before=trigger_message): if msg.author.bot: continue if not msg.content or not msg.content.strip(): continue + if self._was_moderated(msg): + continue raw_messages.append(msg) if len(raw_messages) >= scan_count: break diff --git a/cogs/sentiment/state.py b/cogs/sentiment/state.py index 4a442ee..35a6e5b 100644 --- a/cogs/sentiment/state.py +++ b/cogs/sentiment/state.py @@ -22,6 +22,7 @@ def save_user_state(bot, dirty_users: set[int], user_id: int) -> None: warned=user_data.warned_since_reset, last_offense_at=user_data.last_offense_time or None, aliases=_aliases_csv(user_data), + warning_expires_at=user_data.warning_expires_at or None, )) dirty_users.discard(user_id) @@ -44,5 +45,6 @@ async def flush_dirty_states(bot, dirty_users: set[int]) -> None: warned=user_data.warned_since_reset, last_offense_at=user_data.last_offense_time or None, aliases=_aliases_csv(user_data), + warning_expires_at=user_data.warning_expires_at or None, ) logger.info("Flushed %d dirty user states to DB.", len(dirty)) diff --git a/config.yaml b/config.yaml index 1990f01..9b218ab 100644 --- a/config.yaml +++ b/config.yaml @@ -44,6 +44,7 @@ timeouts: escalation_minutes: [30, 60, 120, 240] # Escalating timeout durations offense_reset_minutes: 1440 # Reset offense counter after this much good behavior (24h) warning_cooldown_minutes: 5 # Don't warn same user more than once per this window + warning_expiration_minutes: 30 # Warning expires after this long — user must be re-warned before mute messages: warning: "Easy there, {username}. The Breehavior Monitor is watching. \U0001F440" diff --git a/utils/database.py b/utils/database.py index 1aeb454..b121f5e 100644 --- a/utils/database.py +++ b/utils/database.py @@ -144,6 +144,12 @@ class Database: ALTER TABLE UserState ADD Aliases NVARCHAR(500) NULL """) + # --- Schema migration for warning expiration --- + cursor.execute(""" + IF COL_LENGTH('UserState', 'WarningExpiresAt') IS NULL + ALTER TABLE UserState ADD WarningExpiresAt FLOAT NULL + """) + cursor.execute(""" IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'BotSettings') CREATE TABLE BotSettings ( @@ -321,19 +327,20 @@ class Database: warned: bool = False, last_offense_at: float | None = None, aliases: str | None = None, + warning_expires_at: float | None = None, ) -> None: - """Upsert user state (offense count, immunity, off-topic count, coherence baseline, notes, warned, last offense time, aliases).""" + """Upsert user state (offense count, immunity, off-topic count, coherence baseline, notes, warned, last offense time, aliases, warning expiration).""" if not self._available: return try: await asyncio.to_thread( self._save_user_state_sync, - user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned, last_offense_at, aliases, + user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned, last_offense_at, aliases, warning_expires_at, ) except Exception: logger.exception("Failed to save user state") - def _save_user_state_sync(self, user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned, last_offense_at, aliases): + def _save_user_state_sync(self, user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned, last_offense_at, aliases, warning_expires_at): conn = self._connect() try: cursor = conn.cursor() @@ -344,14 +351,14 @@ class Database: WHEN MATCHED THEN UPDATE SET OffenseCount = ?, Immune = ?, OffTopicCount = ?, BaselineCoherence = ?, UserNotes = ?, Warned = ?, - LastOffenseAt = ?, Aliases = ?, + LastOffenseAt = ?, Aliases = ?, WarningExpiresAt = ?, UpdatedAt = SYSUTCDATETIME() WHEN NOT MATCHED THEN - INSERT (UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned, LastOffenseAt, Aliases) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);""", + INSERT (UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned, LastOffenseAt, Aliases, WarningExpiresAt) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);""", user_id, - offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0, last_offense_at, aliases, - user_id, offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0, last_offense_at, aliases, + offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0, last_offense_at, aliases, warning_expires_at, + user_id, offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0, last_offense_at, aliases, warning_expires_at, ) cursor.close() finally: @@ -394,7 +401,7 @@ class Database: try: cursor = conn.cursor() cursor.execute( - "SELECT UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned, LastOffenseAt, Aliases FROM UserState" + "SELECT UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned, LastOffenseAt, Aliases, WarningExpiresAt FROM UserState" ) rows = cursor.fetchall() cursor.close() @@ -409,6 +416,7 @@ class Database: "warned": bool(row[6]), "last_offense_at": float(row[7]) if row[7] is not None else 0.0, "aliases": row[8] or "", + "warning_expires_at": float(row[9]) if row[9] is not None else 0.0, } for row in rows ] diff --git a/utils/drama_tracker.py b/utils/drama_tracker.py index 0d42530..778cca8 100644 --- a/utils/drama_tracker.py +++ b/utils/drama_tracker.py @@ -19,6 +19,7 @@ class UserDrama: last_warning_time: float = 0.0 last_analysis_time: float = 0.0 warned_since_reset: bool = False + warning_expires_at: float = 0.0 immune: bool = False # Topic drift tracking off_topic_count: int = 0 @@ -40,10 +41,12 @@ class DramaTracker: window_size: int = 10, window_minutes: int = 15, offense_reset_minutes: int = 120, + warning_expiration_minutes: int = 30, ): self.window_size = window_size self.window_seconds = window_minutes * 60 self.offense_reset_seconds = offense_reset_minutes * 60 + self.warning_expiration_seconds = warning_expiration_minutes * 60 self._users: dict[int, UserDrama] = {} def get_user(self, user_id: int) -> UserDrama: @@ -74,6 +77,7 @@ class DramaTracker: def get_drama_score(self, user_id: int, escalation_boost: float = 0.04) -> float: user = self.get_user(user_id) + self._expire_warning(user) now = time.time() self._prune_entries(user, now) @@ -105,6 +109,7 @@ class DramaTracker: def get_mute_threshold(self, user_id: int, base_threshold: float) -> float: """Lower the mute threshold if user was already warned.""" user = self.get_user(user_id) + self._expire_warning(user) if user.warned_since_reset: return base_threshold - 0.05 return base_threshold @@ -123,12 +128,34 @@ class DramaTracker: user.offense_count += 1 user.last_offense_time = now user.warned_since_reset = False + user.warning_expires_at = 0.0 return user.offense_count def record_warning(self, user_id: int) -> None: user = self.get_user(user_id) - user.last_warning_time = time.time() + now = time.time() + user.last_warning_time = now user.warned_since_reset = True + if self.warning_expiration_seconds > 0: + user.warning_expires_at = now + self.warning_expiration_seconds + else: + user.warning_expires_at = 0.0 # Never expires + + def _expire_warning(self, user: UserDrama) -> None: + """Clear warned flag if the warning has expired.""" + if ( + user.warned_since_reset + and user.warning_expires_at > 0 + and time.time() >= user.warning_expires_at + ): + user.warned_since_reset = False + user.warning_expires_at = 0.0 + + def is_warned(self, user_id: int) -> bool: + """Check if user is currently warned (respects expiration).""" + user = self.get_user(user_id) + self._expire_warning(user) + return user.warned_since_reset def can_warn(self, user_id: int, cooldown_minutes: int) -> bool: user = self.get_user(user_id) @@ -303,12 +330,16 @@ class DramaTracker: user.notes = state["user_notes"] if state.get("warned"): user.warned_since_reset = True + user.warning_expires_at = state.get("warning_expires_at", 0.0) or 0.0 + # Expire warning at load time if it's past due + self._expire_warning(user) if state.get("last_offense_at"): user.last_offense_time = state["last_offense_at"] # Apply time-based offense reset at load time if time.time() - user.last_offense_time > self.offense_reset_seconds: user.offense_count = 0 user.warned_since_reset = False + user.warning_expires_at = 0.0 user.last_offense_time = 0.0 if state.get("aliases"): user.aliases = [a.strip() for a in state["aliases"].split(",") if a.strip()]