feat: add server rule violation detection and compress prompts
- LLM now evaluates messages against numbered server rules and reports violated_rules in analysis output - Warnings and mutes cite the specific rule(s) broken - Rules extracted to prompts/rules.txt for prompt injection - Personality prompts moved to prompts/personalities/ and compressed (~63% reduction across all prompt files) - All prompt files tightened: removed redundancy, consolidated Do NOT sections, trimmed examples while preserving behavioral instructions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -85,7 +85,7 @@ class ChatCog(commands.Cog):
|
||||
def _get_active_prompt(self) -> str:
|
||||
"""Load the chat prompt for the current mode."""
|
||||
mode_config = self.bot.get_mode_config()
|
||||
prompt_file = mode_config.get("prompt_file", "chat_personality.txt")
|
||||
prompt_file = mode_config.get("prompt_file", "personalities/chat_personality.txt")
|
||||
return _load_prompt(prompt_file)
|
||||
|
||||
async def _build_memory_context(self, user_id: int, message_text: str, channel_name: str) -> str:
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
import discord
|
||||
@@ -18,6 +19,34 @@ logger = logging.getLogger("bcs.sentiment")
|
||||
# How often to flush dirty user states to DB (seconds)
|
||||
STATE_FLUSH_INTERVAL = 300 # 5 minutes
|
||||
|
||||
# Load server rules from prompt file (cached at import time)
|
||||
_PROMPTS_DIR = Path(__file__).resolve().parent.parent.parent / "prompts"
|
||||
|
||||
|
||||
def _load_rules() -> tuple[str, dict[int, str]]:
|
||||
"""Load rules from prompts/rules.txt, returning (raw text, {num: text} dict)."""
|
||||
path = _PROMPTS_DIR / "rules.txt"
|
||||
if not path.exists():
|
||||
return "", {}
|
||||
text = path.read_text(encoding="utf-8").strip()
|
||||
if not text:
|
||||
return "", {}
|
||||
rules_dict = {}
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split(". ", 1)
|
||||
if len(parts) == 2:
|
||||
try:
|
||||
rules_dict[int(parts[0])] = parts[1]
|
||||
except ValueError:
|
||||
pass
|
||||
return text, rules_dict
|
||||
|
||||
|
||||
_RULES_TEXT, _RULES_DICT = _load_rules()
|
||||
|
||||
|
||||
class SentimentCog(commands.Cog):
|
||||
def __init__(self, bot: commands.Bot):
|
||||
@@ -176,20 +205,27 @@ class SentimentCog(commands.Cog):
|
||||
categories: list[str],
|
||||
thresholds: dict,
|
||||
db_message_id: int | None,
|
||||
violated_rules: list[int] | None = None,
|
||||
) -> None:
|
||||
"""Issue a warning or mute based on scores and thresholds."""
|
||||
rules_config = _RULES_DICT
|
||||
mute_threshold = self.bot.drama_tracker.get_mute_threshold(user_id, thresholds["mute"])
|
||||
user_data = self.bot.drama_tracker.get_user(user_id)
|
||||
if drama_score >= mute_threshold or score >= thresholds["spike_mute"]:
|
||||
effective_score = max(drama_score, score)
|
||||
if user_data.warned_since_reset:
|
||||
await mute_user(self.bot, message, effective_score, categories, db_message_id, self._dirty_users)
|
||||
await mute_user(self.bot, message, effective_score, categories, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config)
|
||||
else:
|
||||
logger.info("Downgrading mute to warning for %s (no prior warning)", message.author)
|
||||
await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users)
|
||||
await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config)
|
||||
elif drama_score >= thresholds["warning"] or score >= thresholds["spike_warn"]:
|
||||
effective_score = max(drama_score, score)
|
||||
await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users)
|
||||
await warn_user(self.bot, message, effective_score, db_message_id, self._dirty_users, violated_rules=violated_rules, rules_config=rules_config)
|
||||
|
||||
@staticmethod
|
||||
def _build_rules_context() -> str:
|
||||
"""Return server rules text loaded from prompts/rules.txt."""
|
||||
return _RULES_TEXT
|
||||
|
||||
@staticmethod
|
||||
def _build_user_lookup(messages: list[discord.Message]) -> dict[str, tuple[int, discord.Message, list[discord.Message]]]:
|
||||
@@ -359,6 +395,7 @@ class SentimentCog(commands.Cog):
|
||||
categories = finding["categories"]
|
||||
reasoning = finding["reasoning"]
|
||||
off_topic = finding.get("off_topic", False)
|
||||
violated_rules = finding.get("violated_rules", [])
|
||||
note_update = finding.get("note_update")
|
||||
|
||||
# Track in DramaTracker
|
||||
@@ -449,6 +486,7 @@ class SentimentCog(commands.Cog):
|
||||
if not dry_run:
|
||||
await self._apply_moderation(
|
||||
user_ref_msg, user_id, score, drama_score, categories, thresholds, db_message_id,
|
||||
violated_rules=violated_rules,
|
||||
)
|
||||
|
||||
return (username, score, drama_score, categories)
|
||||
@@ -507,6 +545,7 @@ class SentimentCog(commands.Cog):
|
||||
alias_context = self._build_alias_context(all_messages, anon_map)
|
||||
|
||||
channel_context = build_channel_context(ref_message, game_channels)
|
||||
rules_context = self._build_rules_context()
|
||||
|
||||
logger.info(
|
||||
"Channel analysis: %d new messages (+%d context) in #%s",
|
||||
@@ -521,6 +560,7 @@ class SentimentCog(commands.Cog):
|
||||
user_notes_map=anon_notes,
|
||||
new_message_start=new_message_start,
|
||||
user_aliases=alias_context,
|
||||
rules_context=rules_context,
|
||||
)
|
||||
|
||||
if result is None:
|
||||
@@ -541,6 +581,7 @@ class SentimentCog(commands.Cog):
|
||||
user_notes_map=anon_notes,
|
||||
new_message_start=new_message_start,
|
||||
user_aliases=alias_context,
|
||||
rules_context=rules_context,
|
||||
)
|
||||
if heavy_result is not None:
|
||||
logger.info(
|
||||
@@ -683,6 +724,7 @@ class SentimentCog(commands.Cog):
|
||||
alias_context = self._build_alias_context(raw_messages, anon_map)
|
||||
|
||||
channel_context = build_channel_context(raw_messages[0], game_channels)
|
||||
rules_context = self._build_rules_context()
|
||||
mention_context = (
|
||||
f"A user flagged this conversation and said: \"{mention_text}\"\n"
|
||||
f"Pay special attention to whether this concern is valid."
|
||||
@@ -695,6 +737,7 @@ class SentimentCog(commands.Cog):
|
||||
channel_context=channel_context,
|
||||
user_notes_map=anon_notes,
|
||||
user_aliases=alias_context,
|
||||
rules_context=rules_context,
|
||||
)
|
||||
|
||||
if result is None:
|
||||
|
||||
@@ -13,6 +13,7 @@ logger = logging.getLogger("bcs.sentiment")
|
||||
async def mute_user(
|
||||
bot, message: discord.Message, score: float,
|
||||
categories: list[str], db_message_id: int | None, dirty_users: set[int],
|
||||
violated_rules: list[int] | None = None, rules_config: dict | None = None,
|
||||
):
|
||||
member = message.author
|
||||
if not isinstance(member, discord.Member):
|
||||
@@ -43,14 +44,25 @@ async def mute_user(
|
||||
messages_config = bot.config.get("messages", {})
|
||||
cat_str = ", ".join(c for c in categories if c != "none") or "general negativity"
|
||||
|
||||
# Build rule citation text
|
||||
rules_text = ""
|
||||
if violated_rules and rules_config:
|
||||
rule_lines = [f"Rule {r}: {rules_config[r]}" for r in violated_rules if r in rules_config]
|
||||
if rule_lines:
|
||||
rules_text = "\n".join(rule_lines)
|
||||
|
||||
description = messages_config.get("mute_description", "").format(
|
||||
username=member.display_name,
|
||||
duration=f"{duration_minutes} minutes",
|
||||
score=f"{score:.2f}",
|
||||
categories=cat_str,
|
||||
)
|
||||
if rules_text:
|
||||
description += f"\n\nRules violated:\n{rules_text}"
|
||||
|
||||
embed = discord.Embed(
|
||||
title=messages_config.get("mute_title", "BREEHAVIOR ALERT"),
|
||||
description=messages_config.get("mute_description", "").format(
|
||||
username=member.display_name,
|
||||
duration=f"{duration_minutes} minutes",
|
||||
score=f"{score:.2f}",
|
||||
categories=cat_str,
|
||||
),
|
||||
description=description,
|
||||
color=discord.Color.red(),
|
||||
)
|
||||
embed.set_footer(
|
||||
@@ -58,25 +70,29 @@ async def mute_user(
|
||||
)
|
||||
|
||||
await message.channel.send(embed=embed)
|
||||
|
||||
rules_log = f" | Rules: {','.join(str(r) for r in violated_rules)}" if violated_rules else ""
|
||||
await log_action(
|
||||
message.guild,
|
||||
f"**MUTE** | {member.mention} | Score: {score:.2f} | "
|
||||
f"Duration: {duration_minutes}m | Offense #{offense_num} | "
|
||||
f"Categories: {cat_str}",
|
||||
f"Categories: {cat_str}{rules_log}",
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Muted %s for %d minutes (offense #%d, score %.2f)",
|
||||
"Muted %s for %d minutes (offense #%d, score %.2f, rules=%s)",
|
||||
member, duration_minutes, offense_num, score,
|
||||
violated_rules or [],
|
||||
)
|
||||
|
||||
rules_detail = f" rules={','.join(str(r) for r in violated_rules)}" if violated_rules else ""
|
||||
asyncio.create_task(bot.db.save_action(
|
||||
guild_id=message.guild.id,
|
||||
user_id=member.id,
|
||||
username=member.display_name,
|
||||
action_type="mute",
|
||||
message_id=db_message_id,
|
||||
details=f"duration={duration_minutes}m offense={offense_num} score={score:.2f} categories={cat_str}",
|
||||
details=f"duration={duration_minutes}m offense={offense_num} score={score:.2f} categories={cat_str}{rules_detail}",
|
||||
))
|
||||
save_user_state(bot, dirty_users, member.id)
|
||||
|
||||
@@ -84,6 +100,7 @@ async def mute_user(
|
||||
async def warn_user(
|
||||
bot, message: discord.Message, score: float,
|
||||
db_message_id: int | None, dirty_users: set[int],
|
||||
violated_rules: list[int] | None = None, rules_config: dict | None = None,
|
||||
):
|
||||
timeout_config = bot.config.get("timeouts", {})
|
||||
cooldown = timeout_config.get("warning_cooldown_minutes", 5)
|
||||
@@ -104,20 +121,29 @@ async def warn_user(
|
||||
"Easy there, {username}. The Breehavior Monitor is watching.",
|
||||
).format(username=message.author.display_name)
|
||||
|
||||
# Append rule citation if rules were violated
|
||||
if violated_rules and rules_config:
|
||||
rule_lines = [f"Rule {r}: {rules_config[r]}" for r in violated_rules if r in rules_config]
|
||||
if rule_lines:
|
||||
warning_text += "\n" + " | ".join(rule_lines)
|
||||
|
||||
await message.channel.send(warning_text)
|
||||
|
||||
rules_log = f" | Rules: {','.join(str(r) for r in violated_rules)}" if violated_rules else ""
|
||||
await log_action(
|
||||
message.guild,
|
||||
f"**WARNING** | {message.author.mention} | Score: {score:.2f}",
|
||||
f"**WARNING** | {message.author.mention} | Score: {score:.2f}{rules_log}",
|
||||
)
|
||||
|
||||
logger.info("Warned %s (score %.2f)", message.author, score)
|
||||
logger.info("Warned %s (score %.2f, rules=%s)", message.author, score, violated_rules or [])
|
||||
|
||||
rules_detail = f" rules={','.join(str(r) for r in violated_rules)}" if violated_rules else ""
|
||||
asyncio.create_task(bot.db.save_action(
|
||||
guild_id=message.guild.id,
|
||||
user_id=message.author.id,
|
||||
username=message.author.display_name,
|
||||
action_type="warning",
|
||||
message_id=db_message_id,
|
||||
details=f"score={score:.2f}",
|
||||
details=f"score={score:.2f}{rules_detail}",
|
||||
))
|
||||
save_user_state(bot, dirty_users, message.author.id)
|
||||
|
||||
Reference in New Issue
Block a user