feat: require warning before mute + sustained toxicity escalation
Gate mutes behind a prior warning — first offense always gets a warning, mute only fires if warned_since_reset is True. Warned flag is persisted to DB (new Warned column on UserState) and survives restarts. Add post-warning escalation boost to drama_score: each high-scoring message after a warning adds +0.04 (configurable) so sustained bad behavior ramps toward the mute threshold instead of plateauing. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+16
-2
@@ -70,7 +70,7 @@ class DramaTracker:
|
||||
user.last_analysis_time = now
|
||||
self._prune_entries(user, now)
|
||||
|
||||
def get_drama_score(self, user_id: int) -> float:
|
||||
def get_drama_score(self, user_id: int, escalation_boost: float = 0.04) -> float:
|
||||
user = self.get_user(user_id)
|
||||
now = time.time()
|
||||
self._prune_entries(user, now)
|
||||
@@ -86,7 +86,19 @@ class DramaTracker:
|
||||
weighted_sum += entry.toxicity_score * weight
|
||||
total_weight += weight
|
||||
|
||||
return weighted_sum / total_weight if total_weight > 0 else 0.0
|
||||
base_score = weighted_sum / total_weight if total_weight > 0 else 0.0
|
||||
|
||||
# Escalation: if warned, each high-scoring message AFTER the warning
|
||||
# adds a boost so sustained bad behavior ramps toward mute threshold
|
||||
if user.warned_since_reset and user.last_warning_time > 0:
|
||||
post_warn_high = sum(
|
||||
1 for e in user.entries
|
||||
if e.timestamp > user.last_warning_time and e.toxicity_score >= 0.5
|
||||
)
|
||||
if post_warn_high > 0:
|
||||
base_score += escalation_boost * post_warn_high
|
||||
|
||||
return min(base_score, 1.0)
|
||||
|
||||
def get_mute_threshold(self, user_id: int, base_threshold: float) -> float:
|
||||
"""Lower the mute threshold if user was already warned."""
|
||||
@@ -272,6 +284,8 @@ class DramaTracker:
|
||||
user.baseline_coherence = state["baseline_coherence"]
|
||||
if "user_notes" in state and state["user_notes"]:
|
||||
user.notes = state["user_notes"]
|
||||
if state.get("warned"):
|
||||
user.warned_since_reset = True
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
Reference in New Issue
Block a user