Files
Breehavior-Monitor/cogs/sentiment/__init__.py
AJ Isaacs 660086a500 refactor: extract sentiment cog into package with shared _process_finding
Convert cogs/sentiment.py (1050 lines) into cogs/sentiment/ package:
- __init__.py (656 lines): core SentimentCog with new _process_finding()
  that deduplicates the per-user finding loop from _process_buffered and
  _run_mention_scan (~90 lines each → single shared method)
- actions.py: mute_user, warn_user
- topic_drift.py: handle_topic_drift
- channel_redirect.py: handle_channel_redirect, build_channel_context
- coherence.py: handle_coherence_alert
- log_utils.py: log_analysis, log_action, score_color
- state.py: save_user_state, flush_dirty_states

All extracted modules use plain async functions (not methods) receiving
bot/config as parameters. Named log_utils.py to avoid shadowing stdlib
logging. Also update CLAUDE.md with comprehensive project documentation.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 17:06:27 -05:00

657 lines
26 KiB
Python

import asyncio
import logging
from datetime import datetime, timezone
import discord
from discord.ext import commands, tasks
from cogs.sentiment.actions import mute_user, warn_user
from cogs.sentiment.channel_redirect import build_channel_context, handle_channel_redirect
from cogs.sentiment.coherence import handle_coherence_alert
from cogs.sentiment.log_utils import log_analysis
from cogs.sentiment.state import flush_dirty_states
from cogs.sentiment.topic_drift import handle_topic_drift
logger = logging.getLogger("bcs.sentiment")
# How often to flush dirty user states to DB (seconds)
STATE_FLUSH_INTERVAL = 300 # 5 minutes
class SentimentCog(commands.Cog):
def __init__(self, bot: commands.Bot):
self.bot = bot
# Track which user IDs have unsaved in-memory changes
self._dirty_users: set[int] = set()
# Per-user redirect cooldown: {user_id: last_redirect_datetime}
self._redirect_cooldowns: dict[int, datetime] = {}
# Debounce buffer: keyed by channel_id, stores list of messages from ALL users
self._message_buffer: dict[int, list[discord.Message]] = {}
# Pending debounce timer tasks (per-channel)
self._debounce_tasks: dict[int, asyncio.Task] = {}
# Mention scan tasks (separate from debounce)
self._mention_scan_tasks: dict[int, asyncio.Task] = {}
# Mention scan state
self._mention_scan_cooldowns: dict[int, datetime] = {} # {channel_id: last_scan_time}
self._mention_scan_results: dict[int, str] = {} # {trigger_message_id: findings_summary}
self._analyzed_message_ids: set[int] = set() # Discord message IDs already analyzed
self._max_analyzed_ids = 500
async def cog_load(self):
self._flush_states.start()
async def cog_unload(self):
self._flush_states.cancel()
# Cancel all pending debounce timers and process remaining buffers
for task in self._debounce_tasks.values():
task.cancel()
self._debounce_tasks.clear()
for task in self._mention_scan_tasks.values():
task.cancel()
self._mention_scan_tasks.clear()
for channel_id in list(self._message_buffer):
await self._process_buffered(channel_id)
# Final flush on shutdown
await flush_dirty_states(self.bot, self._dirty_users)
@commands.Cog.listener()
async def on_message(self, message: discord.Message):
logger.info("MSG from %s in #%s: %s", message.author, getattr(message.channel, 'name', 'DM'), message.content[:80] if message.content else "(empty)")
# Ignore bots (including ourselves)
if message.author.bot:
return
# Ignore DMs
if not message.guild:
return
config = self.bot.config
monitoring = config.get("monitoring", {})
if not monitoring.get("enabled", True):
return
# Check if channel is monitored
monitored_channels = monitoring.get("channels", [])
if monitored_channels and message.channel.id not in monitored_channels:
return
# Check ignored users
if message.author.id in monitoring.get("ignored_users", []):
return
# Check immune roles
immune_roles = set(monitoring.get("immune_roles", []))
if immune_roles and any(
r.id in immune_roles for r in message.author.roles
):
return
# Check per-user immunity
if self.bot.drama_tracker.is_immune(message.author.id):
return
# Explicit @mention of the bot triggers a mention scan instead of scoring.
# Reply-pings (Discord auto-adds replied-to user to mentions) should NOT
# trigger scans — and reply-to-bot messages should still be scored normally
# so toxic replies to bot warnings aren't silently skipped.
bot_mentioned_in_text = (
f"<@{self.bot.user.id}>" in (message.content or "")
or f"<@!{self.bot.user.id}>" in (message.content or "")
)
if bot_mentioned_in_text:
mention_config = config.get("mention_scan", {})
if mention_config.get("enabled", True):
await self._maybe_start_mention_scan(message, mention_config)
return
# Skip if empty
if not message.content or not message.content.strip():
return
# Buffer the message and start/reset debounce timer (per-channel)
channel_id = message.channel.id
if channel_id not in self._message_buffer:
self._message_buffer[channel_id] = []
self._message_buffer[channel_id].append(message)
# Cancel existing debounce timer for this channel
existing_task = self._debounce_tasks.get(channel_id)
if existing_task and not existing_task.done():
existing_task.cancel()
batch_window = config.get("sentiment", {}).get("batch_window_seconds", 3)
self._debounce_tasks[channel_id] = asyncio.create_task(
self._debounce_then_process(channel_id, batch_window)
)
async def _debounce_then_process(self, channel_id: int, delay: float):
"""Sleep for the debounce window, then process the buffered messages."""
try:
await asyncio.sleep(delay)
await self._process_buffered(channel_id)
except asyncio.CancelledError:
pass # Timer was reset by a new message — expected
def _resolve_thresholds(self) -> dict:
"""Resolve effective moderation thresholds based on current mode."""
config = self.bot.config
sentiment_config = config.get("sentiment", {})
mode_config = self.bot.get_mode_config()
moderation_level = mode_config.get("moderation", "full")
if moderation_level == "relaxed" and "relaxed_thresholds" in mode_config:
rt = mode_config["relaxed_thresholds"]
return {
"warning": rt.get("warning_threshold", 0.80),
"mute": rt.get("mute_threshold", 0.85),
"spike_warn": rt.get("spike_warning_threshold", 0.70),
"spike_mute": rt.get("spike_mute_threshold", 0.85),
}
return {
"warning": sentiment_config.get("warning_threshold", 0.6),
"mute": sentiment_config.get("mute_threshold", 0.75),
"spike_warn": sentiment_config.get("spike_warning_threshold", 0.5),
"spike_mute": sentiment_config.get("spike_mute_threshold", 0.8),
}
async def _apply_moderation(
self,
message: discord.Message,
user_id: int,
score: float,
drama_score: float,
categories: list[str],
thresholds: dict,
db_message_id: int | None,
) -> None:
"""Issue a warning or mute based on scores and thresholds."""
mute_threshold = self.bot.drama_tracker.get_mute_threshold(user_id, thresholds["mute"])
user_data = self.bot.drama_tracker.get_user(user_id)
if drama_score >= mute_threshold or score >= thresholds["spike_mute"]:
effective_score = max(drama_score, score)
if user_data.warned_since_reset:
await mute_user(self.bot, message, effective_score, categories, db_message_id, self._dirty_users)
else:
logger.info("Downgrading mute to warning for %s (no prior warning)", message.author)
await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users)
elif drama_score >= thresholds["warning"] or score >= thresholds["spike_warn"]:
effective_score = max(drama_score, score)
await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users)
@staticmethod
def _build_user_lookup(messages: list[discord.Message]) -> dict[str, tuple[int, discord.Message, list[discord.Message]]]:
"""Build username -> (user_id, ref_msg, [messages]) mapping."""
lookup: dict[str, tuple[int, discord.Message, list[discord.Message]]] = {}
for msg in messages:
name = msg.author.display_name
if name not in lookup:
lookup[name] = (msg.author.id, msg, [])
lookup[name][2].append(msg)
return lookup
def _build_user_notes_map(self, messages: list[discord.Message]) -> dict[str, str]:
"""Build username -> LLM notes mapping for users in the message list."""
notes_map: dict[str, str] = {}
for msg in messages:
name = msg.author.display_name
if name not in notes_map:
notes = self.bot.drama_tracker.get_user_notes(msg.author.id)
if notes:
notes_map[name] = notes
return notes_map
@staticmethod
def _build_conversation(
messages: list[discord.Message],
) -> list[tuple[str, str, datetime, str | None]]:
"""Convert a list of Discord messages to conversation tuples with reply resolution."""
msg_id_to_author = {m.id: m.author.display_name for m in messages}
conversation = []
for msg in messages:
reply_to = None
if msg.reference and msg.reference.message_id:
reply_to = msg_id_to_author.get(msg.reference.message_id)
if not reply_to:
ref = msg.reference.cached_message
if ref:
reply_to = ref.author.display_name
conversation.append((
msg.author.display_name,
msg.content,
msg.created_at,
reply_to,
))
return conversation
# -- Shared finding processor --
async def _process_finding(
self,
finding: dict,
user_lookup: dict,
*,
sentiment_config: dict,
dry_run: bool,
thresholds: dict,
db_content: str,
db_topic_category: str,
db_topic_reasoning: str,
db_coherence_score: float | None,
db_coherence_flag: str | None,
game_channels: dict | None = None,
coherence_config: dict | None = None,
) -> tuple[str, float, float, list[str]] | None:
"""Process a single user finding.
Returns (username, score, drama_score, categories) or None if skipped.
When game_channels is not None, topic drift, game redirect, and coherence
handlers are active (buffered analysis mode). When None, they are skipped
(mention scan mode).
"""
username = finding["username"]
lookup = user_lookup.get(username)
if not lookup:
return None
user_id, user_ref_msg, user_msgs = lookup
score = finding["toxicity_score"]
categories = finding["categories"]
reasoning = finding["reasoning"]
off_topic = finding.get("off_topic", False)
note_update = finding.get("note_update")
# Track in DramaTracker
self.bot.drama_tracker.add_entry(user_id, score, categories, reasoning)
escalation_boost = sentiment_config.get("escalation_boost", 0.04)
drama_score = self.bot.drama_tracker.get_drama_score(user_id, escalation_boost=escalation_boost)
logger.info(
"User %s (%d) | msg_score=%.2f | drama_score=%.2f | categories=%s | %s",
username, user_id, score, drama_score, categories, reasoning,
)
# Save to DB
db_message_id = await self.bot.db.save_message_and_analysis(
guild_id=user_ref_msg.guild.id,
channel_id=user_ref_msg.channel.id,
user_id=user_id,
username=username,
content=db_content,
message_ts=user_ref_msg.created_at.replace(tzinfo=timezone.utc),
toxicity_score=score,
drama_score=drama_score,
categories=categories,
reasoning=reasoning,
off_topic=off_topic,
topic_category=db_topic_category,
topic_reasoning=db_topic_reasoning,
coherence_score=db_coherence_score,
coherence_flag=db_coherence_flag,
)
# Feature handlers — only active during buffered analysis (game_channels set)
if game_channels is not None:
if off_topic:
await handle_topic_drift(
self.bot, user_ref_msg, db_topic_category, db_topic_reasoning,
db_message_id, self._dirty_users,
)
detected_game = finding.get("detected_game")
if detected_game and game_channels and not dry_run:
await handle_channel_redirect(
self.bot, user_ref_msg, detected_game, game_channels,
db_message_id, self._redirect_cooldowns,
)
if coherence_config is not None and coherence_config.get("enabled", True):
coherence_score = finding.get("coherence_score", 0.85)
coherence_flag = finding.get("coherence_flag", "normal")
degradation = self.bot.drama_tracker.update_coherence(
user_id=user_id,
score=coherence_score,
flag=coherence_flag,
drop_threshold=coherence_config.get("drop_threshold", 0.3),
absolute_floor=coherence_config.get("absolute_floor", 0.5),
cooldown_minutes=coherence_config.get("cooldown_minutes", 30),
)
if degradation and not dry_run:
await handle_coherence_alert(
self.bot, user_ref_msg, degradation, coherence_config,
db_message_id, self._dirty_users,
)
# Note update
if note_update:
self.bot.drama_tracker.update_user_notes(user_id, note_update)
self._dirty_users.add(user_id)
self._dirty_users.add(user_id)
# Log analysis
await log_analysis(
user_ref_msg, score, drama_score, categories, reasoning,
off_topic, db_topic_category,
)
# Moderation
if not dry_run:
await self._apply_moderation(
user_ref_msg, user_id, score, drama_score, categories, thresholds, db_message_id,
)
return (username, score, drama_score, categories)
# -- Buffered analysis --
async def _process_buffered(self, channel_id: int):
"""Collect buffered messages, build conversation block, and run analysis."""
messages = self._message_buffer.pop(channel_id, [])
self._debounce_tasks.pop(channel_id, None)
if not messages:
return
# Use the last message as reference for channel/guild
ref_message = messages[-1]
channel = ref_message.channel
config = self.bot.config
sentiment_config = config.get("sentiment", {})
game_channels = config.get("game_channels", {})
# Fetch some history before the buffered messages for leading context
context_count = sentiment_config.get("context_messages", 8)
oldest_buffered = messages[0]
history_messages: list[discord.Message] = []
try:
async for msg in channel.history(limit=context_count + 5, before=oldest_buffered):
if msg.author.bot:
continue
if not msg.content or not msg.content.strip():
continue
history_messages.append(msg)
if len(history_messages) >= context_count:
break
except discord.HTTPException:
pass
history_messages.reverse() # chronological order
# Combine: history (context) + buffered (new messages to analyze)
new_message_start = len(history_messages)
all_messages = history_messages + messages
conversation = self._build_conversation(all_messages)
if not conversation:
return
user_notes_map = self._build_user_notes_map(messages)
channel_context = build_channel_context(ref_message, game_channels)
logger.info(
"Channel analysis: %d new messages (+%d context) in #%s",
len(messages), len(history_messages),
getattr(channel, 'name', 'unknown'),
)
# TRIAGE: Lightweight model — conversation-level analysis
result = await self.bot.llm.analyze_conversation(
conversation,
channel_context=channel_context,
user_notes_map=user_notes_map,
new_message_start=new_message_start,
)
if result is None:
return
# ESCALATION: Re-analyze with heavy model if any finding warrants it
escalation_threshold = sentiment_config.get("escalation_threshold", 0.25)
needs_escalation = any(
f["toxicity_score"] >= escalation_threshold
or f.get("off_topic", False)
or f.get("coherence_score", 1.0) < 0.6
for f in result.get("user_findings", [])
)
if needs_escalation:
heavy_result = await self.bot.llm_heavy.analyze_conversation(
conversation,
channel_context=channel_context,
user_notes_map=user_notes_map,
new_message_start=new_message_start,
)
if heavy_result is not None:
logger.info(
"Escalated to heavy model for #%s",
getattr(channel, 'name', 'unknown'),
)
result = heavy_result
user_lookup = self._build_user_lookup(messages)
# Mark all buffered messages as analyzed (for mention scan dedup)
for m in messages:
self._mark_analyzed(m.id)
dry_run = config.get("monitoring", {}).get("dry_run", False)
thresholds = self._resolve_thresholds()
coherence_config = config.get("coherence", {})
# Process per-user findings
for finding in result.get("user_findings", []):
username = finding["username"]
lookup = user_lookup.get(username)
if not lookup:
continue
_, _, user_msgs = lookup
combined_content = "\n".join(
m.content for m in user_msgs if m.content and m.content.strip()
)[:4000]
await self._process_finding(
finding, user_lookup,
sentiment_config=sentiment_config,
dry_run=dry_run,
thresholds=thresholds,
db_content=combined_content,
db_topic_category=finding.get("topic_category", "general_chat"),
db_topic_reasoning=finding.get("topic_reasoning", ""),
db_coherence_score=finding.get("coherence_score", 0.85),
db_coherence_flag=finding.get("coherence_flag", "normal"),
game_channels=game_channels,
coherence_config=coherence_config,
)
# -- Mention scan methods --
def _mark_analyzed(self, discord_message_id: int):
"""Track a Discord message ID as already analyzed."""
self._analyzed_message_ids.add(discord_message_id)
if len(self._analyzed_message_ids) > self._max_analyzed_ids:
sorted_ids = sorted(self._analyzed_message_ids)
self._analyzed_message_ids = set(sorted_ids[len(sorted_ids) // 2:])
async def _maybe_start_mention_scan(
self, trigger_message: discord.Message, mention_config: dict
):
"""Check cooldown and kick off a mention-triggered scan of recent messages."""
channel_id = trigger_message.channel.id
cooldown_seconds = mention_config.get("cooldown_seconds", 60)
now = datetime.now(timezone.utc)
last_scan = self._mention_scan_cooldowns.get(channel_id)
if last_scan and (now - last_scan).total_seconds() < cooldown_seconds:
logger.info(
"Mention scan cooldown active for #%s, skipping.",
getattr(trigger_message.channel, "name", "unknown"),
)
return
self._mention_scan_cooldowns[channel_id] = now
# Extract the user's concern (strip the bot ping from the message)
mention_text = trigger_message.content
for fmt in (f"<@{self.bot.user.id}>", f"<@!{self.bot.user.id}>"):
mention_text = mention_text.replace(fmt, "")
mention_text = mention_text.strip() or "(user pinged bot without specific concern)"
# Store as a mention scan task (separate from debounce)
existing_task = self._mention_scan_tasks.get(channel_id)
if existing_task and not existing_task.done():
existing_task.cancel()
self._mention_scan_tasks[channel_id] = asyncio.create_task(
self._run_mention_scan(trigger_message, mention_text, mention_config)
)
async def _run_mention_scan(
self,
trigger_message: discord.Message,
mention_text: str,
mention_config: dict,
):
"""Scan recent channel messages with ONE conversation-level LLM call."""
channel = trigger_message.channel
scan_count = mention_config.get("scan_messages", 30)
config = self.bot.config
sentiment_config = config.get("sentiment", {})
game_channels = config.get("game_channels", {})
# Fetch recent messages (before the trigger, skip bots/empty)
raw_messages: list[discord.Message] = []
try:
async for msg in channel.history(limit=scan_count + 10, before=trigger_message):
if msg.author.bot:
continue
if not msg.content or not msg.content.strip():
continue
raw_messages.append(msg)
if len(raw_messages) >= scan_count:
break
except discord.HTTPException:
logger.warning("Failed to fetch history for mention scan in #%s",
getattr(channel, "name", "unknown"))
return
raw_messages.reverse() # chronological order
if not raw_messages:
self._mention_scan_results[trigger_message.id] = "No recent messages found to analyze."
return
logger.info(
"Mention scan triggered by %s in #%s: %d messages (single LLM call). Focus: %s",
trigger_message.author.display_name,
getattr(channel, "name", "unknown"),
len(raw_messages),
mention_text[:80],
)
conversation = self._build_conversation(raw_messages)
user_notes_map = self._build_user_notes_map(raw_messages)
channel_context = build_channel_context(raw_messages[0], game_channels)
mention_context = (
f"A user flagged this conversation and said: \"{mention_text}\"\n"
f"Pay special attention to whether this concern is valid."
)
# Single LLM call
result = await self.bot.llm.analyze_conversation(
conversation,
mention_context=mention_context,
channel_context=channel_context,
user_notes_map=user_notes_map,
)
if result is None:
logger.warning("Conversation analysis failed for mention scan.")
self._mention_scan_results[trigger_message.id] = "Analysis failed."
return
user_lookup = self._build_user_lookup(raw_messages)
findings: list[str] = []
dry_run = config.get("monitoring", {}).get("dry_run", False)
thresholds = self._resolve_thresholds()
for finding in result.get("user_findings", []):
username = finding["username"]
lookup = user_lookup.get(username)
if not lookup:
logger.warning("Mention scan: LLM returned unknown user '%s', skipping.", username)
continue
user_id, ref_msg, user_msgs = lookup
# Skip if all their messages were already analyzed
if all(m.id in self._analyzed_message_ids for m in user_msgs):
continue
# Mark their messages as analyzed
for m in user_msgs:
self._mark_analyzed(m.id)
worst_msg = finding.get("worst_message")
content = f"[Mention scan] {worst_msg}" if worst_msg else "[Mention scan] See conversation"
off_topic = finding.get("off_topic", False)
result_tuple = await self._process_finding(
finding, user_lookup,
sentiment_config=sentiment_config,
dry_run=dry_run,
thresholds=thresholds,
db_content=content,
db_topic_category="personal_drama" if off_topic else "gaming",
db_topic_reasoning=finding.get("reasoning", ""),
db_coherence_score=None,
db_coherence_flag=None,
)
if result_tuple:
_, score, _, categories = result_tuple
if score >= 0.3:
cat_str = ", ".join(c for c in categories if c != "none") or "none"
findings.append(f"{username}: {score:.2f} ({cat_str})")
# Build summary for ChatCog
convo_summary = result.get("conversation_summary", "")
if findings:
summary = f"Scanned {len(raw_messages)} msgs. {convo_summary} Notable: " + "; ".join(findings[:5])
else:
summary = f"Scanned {len(raw_messages)} msgs. {convo_summary}"
# Prune old scan results
if len(self._mention_scan_results) > 20:
oldest = sorted(self._mention_scan_results.keys())[:len(self._mention_scan_results) - 10]
for k in oldest:
del self._mention_scan_results[k]
self._mention_scan_results[trigger_message.id] = summary
logger.info(
"Mention scan complete in #%s: 1 LLM call, %d messages, %d users flagged",
getattr(channel, "name", "unknown"),
len(raw_messages),
len(findings),
)
# -- State flush loop --
@tasks.loop(seconds=STATE_FLUSH_INTERVAL)
async def _flush_states(self):
await flush_dirty_states(self.bot, self._dirty_users)
@_flush_states.before_loop
async def _before_flush(self):
await self.bot.wait_until_ready()
async def setup(bot: commands.Bot):
await bot.add_cog(SentimentCog(bot))