feat: add server rule violation detection and compress prompts
- LLM now evaluates messages against numbered server rules and reports violated_rules in analysis output - Warnings and mutes cite the specific rule(s) broken - Rules extracted to prompts/rules.txt for prompt injection - Personality prompts moved to prompts/personalities/ and compressed (~63% reduction across all prompt files) - All prompt files tightened: removed redundancy, consolidated Do NOT sections, trimmed examples while preserving behavioral instructions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -92,6 +92,11 @@ ANALYSIS_TOOL = {
|
||||
"type": ["string", "null"],
|
||||
"description": "The game channel name this message is about (e.g. 'gta-online', 'warzone'), or null if not game-specific.",
|
||||
},
|
||||
"violated_rules": {
|
||||
"type": "array",
|
||||
"items": {"type": "integer"},
|
||||
"description": "Rule numbers violated (empty array if none).",
|
||||
},
|
||||
},
|
||||
"required": ["toxicity_score", "categories", "reasoning", "off_topic", "topic_category", "topic_reasoning", "coherence_score", "coherence_flag"],
|
||||
},
|
||||
@@ -190,6 +195,11 @@ CONVERSATION_TOOL = {
|
||||
"type": ["string", "null"],
|
||||
"description": "The game channel name this user's messages are about, or null.",
|
||||
},
|
||||
"violated_rules": {
|
||||
"type": "array",
|
||||
"items": {"type": "integer"},
|
||||
"description": "Rule numbers violated (empty array if none).",
|
||||
},
|
||||
},
|
||||
"required": ["username", "toxicity_score", "categories", "reasoning", "off_topic", "topic_category", "topic_reasoning", "coherence_score", "coherence_flag"],
|
||||
},
|
||||
@@ -299,12 +309,15 @@ class LLMClient:
|
||||
async def analyze_message(
|
||||
self, message: str, context: str = "", user_notes: str = "",
|
||||
channel_context: str = "", mention_context: str = "",
|
||||
rules_context: str = "",
|
||||
) -> dict | None:
|
||||
user_content = f"=== RECENT CHANNEL MESSAGES (for background context only) ===\n{context}\n\n"
|
||||
if user_notes:
|
||||
user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
|
||||
if channel_context:
|
||||
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
|
||||
if rules_context:
|
||||
user_content += f"=== SERVER RULES ===\n{rules_context}\n\n"
|
||||
if mention_context:
|
||||
user_content += f"=== USER REPORT (a user flagged this conversation — focus on this concern) ===\n{mention_context}\n\n"
|
||||
user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}"
|
||||
@@ -382,6 +395,8 @@ class LLMClient:
|
||||
|
||||
result.setdefault("note_update", None)
|
||||
result.setdefault("detected_game", None)
|
||||
if not isinstance(result.get("violated_rules"), list):
|
||||
result["violated_rules"] = []
|
||||
|
||||
return result
|
||||
|
||||
@@ -490,6 +505,7 @@ class LLMClient:
|
||||
user_notes_map: dict[str, str] | None = None,
|
||||
new_message_start: int | None = None,
|
||||
user_aliases: str = "",
|
||||
rules_context: str = "",
|
||||
) -> dict | None:
|
||||
"""Analyze a conversation block in one call, returning per-user findings."""
|
||||
if not messages:
|
||||
@@ -506,6 +522,8 @@ class LLMClient:
|
||||
user_content += "=== USER NOTES (from prior analysis) ===\n" + "\n".join(notes_lines) + "\n\n"
|
||||
if channel_context:
|
||||
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
|
||||
if rules_context:
|
||||
user_content += f"=== SERVER RULES ===\n{rules_context}\n\n"
|
||||
if mention_context:
|
||||
user_content += f"=== USER REPORT (a user flagged this conversation — focus on this concern) ===\n{mention_context}\n\n"
|
||||
user_content += "Analyze the conversation block above and report findings for each user."
|
||||
@@ -587,6 +605,8 @@ class LLMClient:
|
||||
finding.setdefault("coherence_flag", "normal")
|
||||
finding.setdefault("note_update", None)
|
||||
finding.setdefault("detected_game", None)
|
||||
if not isinstance(finding.get("violated_rules"), list):
|
||||
finding["violated_rules"] = []
|
||||
result["user_findings"] = findings
|
||||
result.setdefault("conversation_summary", "")
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user