feat: add server rule violation detection and compress prompts

- LLM now evaluates messages against numbered server rules and reports
  violated_rules in analysis output
- Warnings and mutes cite the specific rule(s) broken
- Rules extracted to prompts/rules.txt for prompt injection
- Personality prompts moved to prompts/personalities/ and compressed
  (~63% reduction across all prompt files)
- All prompt files tightened: removed redundancy, consolidated Do NOT
  sections, trimmed examples while preserving behavioral instructions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-27 22:14:35 -05:00
parent ed51db527c
commit bf32a9536a
22 changed files with 230 additions and 293 deletions

View File

@@ -92,6 +92,11 @@ ANALYSIS_TOOL = {
"type": ["string", "null"],
"description": "The game channel name this message is about (e.g. 'gta-online', 'warzone'), or null if not game-specific.",
},
"violated_rules": {
"type": "array",
"items": {"type": "integer"},
"description": "Rule numbers violated (empty array if none).",
},
},
"required": ["toxicity_score", "categories", "reasoning", "off_topic", "topic_category", "topic_reasoning", "coherence_score", "coherence_flag"],
},
@@ -190,6 +195,11 @@ CONVERSATION_TOOL = {
"type": ["string", "null"],
"description": "The game channel name this user's messages are about, or null.",
},
"violated_rules": {
"type": "array",
"items": {"type": "integer"},
"description": "Rule numbers violated (empty array if none).",
},
},
"required": ["username", "toxicity_score", "categories", "reasoning", "off_topic", "topic_category", "topic_reasoning", "coherence_score", "coherence_flag"],
},
@@ -299,12 +309,15 @@ class LLMClient:
async def analyze_message(
self, message: str, context: str = "", user_notes: str = "",
channel_context: str = "", mention_context: str = "",
rules_context: str = "",
) -> dict | None:
user_content = f"=== RECENT CHANNEL MESSAGES (for background context only) ===\n{context}\n\n"
if user_notes:
user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
if channel_context:
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
if rules_context:
user_content += f"=== SERVER RULES ===\n{rules_context}\n\n"
if mention_context:
user_content += f"=== USER REPORT (a user flagged this conversation — focus on this concern) ===\n{mention_context}\n\n"
user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}"
@@ -382,6 +395,8 @@ class LLMClient:
result.setdefault("note_update", None)
result.setdefault("detected_game", None)
if not isinstance(result.get("violated_rules"), list):
result["violated_rules"] = []
return result
@@ -490,6 +505,7 @@ class LLMClient:
user_notes_map: dict[str, str] | None = None,
new_message_start: int | None = None,
user_aliases: str = "",
rules_context: str = "",
) -> dict | None:
"""Analyze a conversation block in one call, returning per-user findings."""
if not messages:
@@ -506,6 +522,8 @@ class LLMClient:
user_content += "=== USER NOTES (from prior analysis) ===\n" + "\n".join(notes_lines) + "\n\n"
if channel_context:
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
if rules_context:
user_content += f"=== SERVER RULES ===\n{rules_context}\n\n"
if mention_context:
user_content += f"=== USER REPORT (a user flagged this conversation — focus on this concern) ===\n{mention_context}\n\n"
user_content += "Analyze the conversation block above and report findings for each user."
@@ -587,6 +605,8 @@ class LLMClient:
finding.setdefault("coherence_flag", "normal")
finding.setdefault("note_update", None)
finding.setdefault("detected_game", None)
if not isinstance(finding.get("violated_rules"), list):
finding["violated_rules"] = []
result["user_findings"] = findings
result.setdefault("conversation_summary", "")
return result