feat: add server rule violation detection and compress prompts

- LLM now evaluates messages against numbered server rules and reports
  violated_rules in analysis output
- Warnings and mutes cite the specific rule(s) broken
- Rules extracted to prompts/rules.txt for prompt injection
- Personality prompts moved to prompts/personalities/ and compressed
  (~63% reduction across all prompt files)
- All prompt files tightened: removed redundancy, consolidated Do NOT
  sections, trimmed examples while preserving behavioral instructions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-27 22:14:35 -05:00
parent ed51db527c
commit bf32a9536a
22 changed files with 230 additions and 293 deletions

View File

@@ -72,7 +72,7 @@ modes:
default:
label: "Default"
description: "Hall-monitor moderation mode"
prompt_file: "chat_personality.txt"
prompt_file: "personalities/chat_personality.txt"
proactive_replies: false
reply_chance: 0.0
moderation: full
@@ -80,7 +80,7 @@ modes:
chatty:
label: "Chatty"
description: "Friendly chat participant"
prompt_file: "chat_chatty.txt"
prompt_file: "personalities/chat_chatty.txt"
proactive_replies: true
reply_chance: 0.10
moderation: relaxed
@@ -93,7 +93,7 @@ modes:
roast:
label: "Roast"
description: "Savage roast mode"
prompt_file: "chat_roast.txt"
prompt_file: "personalities/chat_roast.txt"
proactive_replies: true
reply_chance: 0.20
moderation: relaxed
@@ -106,7 +106,7 @@ modes:
hype:
label: "Hype"
description: "Your biggest fan"
prompt_file: "chat_hype.txt"
prompt_file: "personalities/chat_hype.txt"
proactive_replies: true
reply_chance: 0.15
moderation: relaxed
@@ -119,7 +119,7 @@ modes:
drunk:
label: "Drunk"
description: "Had a few too many"
prompt_file: "chat_drunk.txt"
prompt_file: "personalities/chat_drunk.txt"
proactive_replies: true
reply_chance: 0.20
moderation: relaxed
@@ -132,7 +132,7 @@ modes:
english_teacher:
label: "English Teacher"
description: "Insufferable grammar nerd mode"
prompt_file: "chat_english_teacher.txt"
prompt_file: "personalities/chat_english_teacher.txt"
proactive_replies: true
reply_chance: 0.20
moderation: relaxed