feat: require warning before mute + sustained toxicity escalation
Gate mutes behind a prior warning — first offense always gets a warning, mute only fires if warned_since_reset is True. Warned flag is persisted to DB (new Warned column on UserState) and survives restarts. Add post-warning escalation boost to drama_score: each high-scoring message after a warning adds +0.04 (configurable) so sustained bad behavior ramps toward the mute threshold instead of plateauing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -292,7 +292,8 @@ class SentimentCog(commands.Cog):
|
|||||||
|
|
||||||
# Track the result in DramaTracker
|
# Track the result in DramaTracker
|
||||||
self.bot.drama_tracker.add_entry(user_id, score, categories, reasoning)
|
self.bot.drama_tracker.add_entry(user_id, score, categories, reasoning)
|
||||||
drama_score = self.bot.drama_tracker.get_drama_score(user_id)
|
escalation_boost = sentiment_config.get("escalation_boost", 0.04)
|
||||||
|
drama_score = self.bot.drama_tracker.get_drama_score(user_id, escalation_boost=escalation_boost)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"User %s (%d) | msg_score=%.2f | drama_score=%.2f | categories=%s | %s",
|
"User %s (%d) | msg_score=%.2f | drama_score=%.2f | categories=%s | %s",
|
||||||
@@ -358,10 +359,16 @@ class SentimentCog(commands.Cog):
|
|||||||
mute_threshold = self.bot.drama_tracker.get_mute_threshold(
|
mute_threshold = self.bot.drama_tracker.get_mute_threshold(
|
||||||
user_id, base_mute_threshold
|
user_id, base_mute_threshold
|
||||||
)
|
)
|
||||||
|
user_data = self.bot.drama_tracker.get_user(user_id)
|
||||||
# Mute: rolling average OR single message spike
|
# Mute: rolling average OR single message spike
|
||||||
if drama_score >= mute_threshold or score >= spike_mute:
|
if drama_score >= mute_threshold or score >= spike_mute:
|
||||||
effective_score = max(drama_score, score)
|
effective_score = max(drama_score, score)
|
||||||
await self._mute_user(user_ref_msg, effective_score, categories, db_message_id)
|
if user_data.warned_since_reset:
|
||||||
|
await self._mute_user(user_ref_msg, effective_score, categories, db_message_id)
|
||||||
|
else:
|
||||||
|
# Downgrade to warning — require a warning before muting
|
||||||
|
logger.info("Downgrading mute to warning for %s (no prior warning)", user_ref_msg.author)
|
||||||
|
await self._warn_user(user_ref_msg, effective_score, db_message_id)
|
||||||
# Warn: rolling average OR single message spike
|
# Warn: rolling average OR single message spike
|
||||||
elif drama_score >= warning_threshold or score >= spike_warn:
|
elif drama_score >= warning_threshold or score >= spike_warn:
|
||||||
effective_score = max(drama_score, score)
|
effective_score = max(drama_score, score)
|
||||||
@@ -556,7 +563,8 @@ class SentimentCog(commands.Cog):
|
|||||||
self._mark_analyzed(m.id)
|
self._mark_analyzed(m.id)
|
||||||
|
|
||||||
self.bot.drama_tracker.add_entry(user_id, score, categories, reasoning)
|
self.bot.drama_tracker.add_entry(user_id, score, categories, reasoning)
|
||||||
drama_score = self.bot.drama_tracker.get_drama_score(user_id)
|
escalation_boost = sentiment_config.get("escalation_boost", 0.04)
|
||||||
|
drama_score = self.bot.drama_tracker.get_drama_score(user_id, escalation_boost=escalation_boost)
|
||||||
|
|
||||||
# Save to DB
|
# Save to DB
|
||||||
content_summary = f"[Mention scan] {worst_msg}" if worst_msg else "[Mention scan] See conversation"
|
content_summary = f"[Mention scan] {worst_msg}" if worst_msg else "[Mention scan] See conversation"
|
||||||
@@ -599,9 +607,14 @@ class SentimentCog(commands.Cog):
|
|||||||
mute_threshold = self.bot.drama_tracker.get_mute_threshold(
|
mute_threshold = self.bot.drama_tracker.get_mute_threshold(
|
||||||
user_id, base_mute_threshold
|
user_id, base_mute_threshold
|
||||||
)
|
)
|
||||||
|
user_data = self.bot.drama_tracker.get_user(user_id)
|
||||||
if drama_score >= mute_threshold or score >= spike_mute:
|
if drama_score >= mute_threshold or score >= spike_mute:
|
||||||
effective_score = max(drama_score, score)
|
effective_score = max(drama_score, score)
|
||||||
await self._mute_user(ref_msg, effective_score, categories, db_message_id)
|
if user_data.warned_since_reset:
|
||||||
|
await self._mute_user(ref_msg, effective_score, categories, db_message_id)
|
||||||
|
else:
|
||||||
|
logger.info("Downgrading mute to warning for %s (no prior warning)", ref_msg.author)
|
||||||
|
await self._warn_user(ref_msg, effective_score, db_message_id)
|
||||||
elif drama_score >= warning_threshold or score >= spike_warn:
|
elif drama_score >= warning_threshold or score >= spike_warn:
|
||||||
effective_score = max(drama_score, score)
|
effective_score = max(drama_score, score)
|
||||||
await self._warn_user(ref_msg, effective_score, db_message_id)
|
await self._warn_user(ref_msg, effective_score, db_message_id)
|
||||||
@@ -747,6 +760,8 @@ class SentimentCog(commands.Cog):
|
|||||||
message_id=db_message_id,
|
message_id=db_message_id,
|
||||||
details=f"score={score:.2f}",
|
details=f"score={score:.2f}",
|
||||||
))
|
))
|
||||||
|
# Persist warned flag immediately so it survives restarts
|
||||||
|
self._save_user_state(message.author.id)
|
||||||
|
|
||||||
async def _handle_topic_drift(
|
async def _handle_topic_drift(
|
||||||
self, message: discord.Message, topic_category: str, topic_reasoning: str,
|
self, message: discord.Message, topic_category: str, topic_reasoning: str,
|
||||||
@@ -897,6 +912,7 @@ class SentimentCog(commands.Cog):
|
|||||||
off_topic_count=user_data.off_topic_count,
|
off_topic_count=user_data.off_topic_count,
|
||||||
baseline_coherence=user_data.baseline_coherence,
|
baseline_coherence=user_data.baseline_coherence,
|
||||||
user_notes=user_data.notes or None,
|
user_notes=user_data.notes or None,
|
||||||
|
warned=user_data.warned_since_reset,
|
||||||
))
|
))
|
||||||
self._dirty_users.discard(user_id)
|
self._dirty_users.discard(user_id)
|
||||||
|
|
||||||
@@ -923,6 +939,7 @@ class SentimentCog(commands.Cog):
|
|||||||
off_topic_count=user_data.off_topic_count,
|
off_topic_count=user_data.off_topic_count,
|
||||||
baseline_coherence=user_data.baseline_coherence,
|
baseline_coherence=user_data.baseline_coherence,
|
||||||
user_notes=user_data.notes or None,
|
user_notes=user_data.notes or None,
|
||||||
|
warned=user_data.warned_since_reset,
|
||||||
)
|
)
|
||||||
logger.info("Flushed %d dirty user states to DB.", len(dirty))
|
logger.info("Flushed %d dirty user states to DB.", len(dirty))
|
||||||
|
|
||||||
|
|||||||
@@ -17,8 +17,9 @@ sentiment:
|
|||||||
context_messages: 8 # Number of previous messages to include as context
|
context_messages: 8 # Number of previous messages to include as context
|
||||||
rolling_window_size: 10 # Number of messages to track per user
|
rolling_window_size: 10 # Number of messages to track per user
|
||||||
rolling_window_minutes: 15 # Time window for tracking
|
rolling_window_minutes: 15 # Time window for tracking
|
||||||
batch_window_seconds: 10 # Wait this long for more messages before analyzing (debounce)
|
batch_window_seconds: 4 # Wait this long for more messages before analyzing (debounce)
|
||||||
escalation_threshold: 0.25 # Triage toxicity score that triggers re-analysis with heavy model
|
escalation_threshold: 0.25 # Triage toxicity score that triggers re-analysis with heavy model
|
||||||
|
escalation_boost: 0.04 # Per-message drama boost after warning (sustained toxicity ramps toward mute)
|
||||||
|
|
||||||
game_channels:
|
game_channels:
|
||||||
gta-online: "GTA Online"
|
gta-online: "GTA Online"
|
||||||
|
|||||||
@@ -126,6 +126,12 @@ class Database:
|
|||||||
ALTER TABLE UserState ADD UserNotes NVARCHAR(MAX) NULL
|
ALTER TABLE UserState ADD UserNotes NVARCHAR(MAX) NULL
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
# --- Schema migration for warned flag (require warning before mute) ---
|
||||||
|
cursor.execute("""
|
||||||
|
IF COL_LENGTH('UserState', 'Warned') IS NULL
|
||||||
|
ALTER TABLE UserState ADD Warned BIT NOT NULL DEFAULT 0
|
||||||
|
""")
|
||||||
|
|
||||||
cursor.execute("""
|
cursor.execute("""
|
||||||
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'BotSettings')
|
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'BotSettings')
|
||||||
CREATE TABLE BotSettings (
|
CREATE TABLE BotSettings (
|
||||||
@@ -284,19 +290,20 @@ class Database:
|
|||||||
off_topic_count: int,
|
off_topic_count: int,
|
||||||
baseline_coherence: float = 0.85,
|
baseline_coherence: float = 0.85,
|
||||||
user_notes: str | None = None,
|
user_notes: str | None = None,
|
||||||
|
warned: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Upsert user state (offense count, immunity, off-topic count, coherence baseline, notes)."""
|
"""Upsert user state (offense count, immunity, off-topic count, coherence baseline, notes, warned)."""
|
||||||
if not self._available:
|
if not self._available:
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
await asyncio.to_thread(
|
await asyncio.to_thread(
|
||||||
self._save_user_state_sync,
|
self._save_user_state_sync,
|
||||||
user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes,
|
user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Failed to save user state")
|
logger.exception("Failed to save user state")
|
||||||
|
|
||||||
def _save_user_state_sync(self, user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes):
|
def _save_user_state_sync(self, user_id, offense_count, immune, off_topic_count, baseline_coherence, user_notes, warned):
|
||||||
conn = self._connect()
|
conn = self._connect()
|
||||||
try:
|
try:
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
@@ -306,14 +313,14 @@ class Database:
|
|||||||
ON target.UserId = source.UserId
|
ON target.UserId = source.UserId
|
||||||
WHEN MATCHED THEN
|
WHEN MATCHED THEN
|
||||||
UPDATE SET OffenseCount = ?, Immune = ?, OffTopicCount = ?,
|
UPDATE SET OffenseCount = ?, Immune = ?, OffTopicCount = ?,
|
||||||
BaselineCoherence = ?, UserNotes = ?,
|
BaselineCoherence = ?, UserNotes = ?, Warned = ?,
|
||||||
UpdatedAt = SYSUTCDATETIME()
|
UpdatedAt = SYSUTCDATETIME()
|
||||||
WHEN NOT MATCHED THEN
|
WHEN NOT MATCHED THEN
|
||||||
INSERT (UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes)
|
INSERT (UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned)
|
||||||
VALUES (?, ?, ?, ?, ?, ?);""",
|
VALUES (?, ?, ?, ?, ?, ?, ?);""",
|
||||||
user_id,
|
user_id,
|
||||||
offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes,
|
offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0,
|
||||||
user_id, offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes,
|
user_id, offense_count, 1 if immune else 0, off_topic_count, baseline_coherence, user_notes, 1 if warned else 0,
|
||||||
)
|
)
|
||||||
cursor.close()
|
cursor.close()
|
||||||
finally:
|
finally:
|
||||||
@@ -356,7 +363,7 @@ class Database:
|
|||||||
try:
|
try:
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
"SELECT UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes FROM UserState"
|
"SELECT UserId, OffenseCount, Immune, OffTopicCount, BaselineCoherence, UserNotes, Warned FROM UserState"
|
||||||
)
|
)
|
||||||
rows = cursor.fetchall()
|
rows = cursor.fetchall()
|
||||||
cursor.close()
|
cursor.close()
|
||||||
@@ -368,6 +375,7 @@ class Database:
|
|||||||
"off_topic_count": row[3],
|
"off_topic_count": row[3],
|
||||||
"baseline_coherence": float(row[4]),
|
"baseline_coherence": float(row[4]),
|
||||||
"user_notes": row[5] or "",
|
"user_notes": row[5] or "",
|
||||||
|
"warned": bool(row[6]),
|
||||||
}
|
}
|
||||||
for row in rows
|
for row in rows
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ class DramaTracker:
|
|||||||
user.last_analysis_time = now
|
user.last_analysis_time = now
|
||||||
self._prune_entries(user, now)
|
self._prune_entries(user, now)
|
||||||
|
|
||||||
def get_drama_score(self, user_id: int) -> float:
|
def get_drama_score(self, user_id: int, escalation_boost: float = 0.04) -> float:
|
||||||
user = self.get_user(user_id)
|
user = self.get_user(user_id)
|
||||||
now = time.time()
|
now = time.time()
|
||||||
self._prune_entries(user, now)
|
self._prune_entries(user, now)
|
||||||
@@ -86,7 +86,19 @@ class DramaTracker:
|
|||||||
weighted_sum += entry.toxicity_score * weight
|
weighted_sum += entry.toxicity_score * weight
|
||||||
total_weight += weight
|
total_weight += weight
|
||||||
|
|
||||||
return weighted_sum / total_weight if total_weight > 0 else 0.0
|
base_score = weighted_sum / total_weight if total_weight > 0 else 0.0
|
||||||
|
|
||||||
|
# Escalation: if warned, each high-scoring message AFTER the warning
|
||||||
|
# adds a boost so sustained bad behavior ramps toward mute threshold
|
||||||
|
if user.warned_since_reset and user.last_warning_time > 0:
|
||||||
|
post_warn_high = sum(
|
||||||
|
1 for e in user.entries
|
||||||
|
if e.timestamp > user.last_warning_time and e.toxicity_score >= 0.5
|
||||||
|
)
|
||||||
|
if post_warn_high > 0:
|
||||||
|
base_score += escalation_boost * post_warn_high
|
||||||
|
|
||||||
|
return min(base_score, 1.0)
|
||||||
|
|
||||||
def get_mute_threshold(self, user_id: int, base_threshold: float) -> float:
|
def get_mute_threshold(self, user_id: int, base_threshold: float) -> float:
|
||||||
"""Lower the mute threshold if user was already warned."""
|
"""Lower the mute threshold if user was already warned."""
|
||||||
@@ -272,6 +284,8 @@ class DramaTracker:
|
|||||||
user.baseline_coherence = state["baseline_coherence"]
|
user.baseline_coherence = state["baseline_coherence"]
|
||||||
if "user_notes" in state and state["user_notes"]:
|
if "user_notes" in state and state["user_notes"]:
|
||||||
user.notes = state["user_notes"]
|
user.notes = state["user_notes"]
|
||||||
|
if state.get("warned"):
|
||||||
|
user.warned_since_reset = True
|
||||||
count += 1
|
count += 1
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user