Compare commits
4 Commits
09f83f8c2f
...
53803d920f
| Author | SHA1 | Date | |
|---|---|---|---|
| 53803d920f | |||
| b7076dffe2 | |||
| c5316b98d1 | |||
| f75a3ca3f4 |
@@ -171,6 +171,8 @@ class ChatCog(commands.Cog):
|
||||
# Update profile if warranted
|
||||
profile_update = result.get("profile_update")
|
||||
if profile_update:
|
||||
# Sanitize before storing — strips any quoted toxic language
|
||||
profile_update = await self.bot.llm.sanitize_notes(profile_update)
|
||||
self.bot.drama_tracker.set_user_profile(user_id, profile_update)
|
||||
self._dirty_users.add(user_id)
|
||||
|
||||
|
||||
@@ -469,13 +469,14 @@ class SentimentCog(commands.Cog):
|
||||
|
||||
# Note update — route to memory system
|
||||
if note_update:
|
||||
# Still update the legacy notes for backward compat with analysis prompt
|
||||
self.bot.drama_tracker.update_user_notes(user_id, note_update)
|
||||
# Sanitize before storing — strips any quoted toxic language
|
||||
sanitized = await self.bot.llm.sanitize_notes(note_update)
|
||||
self.bot.drama_tracker.update_user_notes(user_id, sanitized)
|
||||
self._dirty_users.add(user_id)
|
||||
# Also save as an expiring memory (7d default for passive observations)
|
||||
asyncio.create_task(self.bot.db.save_memory(
|
||||
user_id=user_id,
|
||||
memory=note_update[:500],
|
||||
memory=sanitized[:500],
|
||||
topics=db_topic_category or "general",
|
||||
importance="medium",
|
||||
expires_at=datetime.now(timezone.utc) + timedelta(days=7),
|
||||
|
||||
@@ -26,7 +26,7 @@ TOPIC: Flag off_topic if the message is personal drama (relationship issues, feu
|
||||
|
||||
GAME DETECTION: If CHANNEL INFO is provided, set detected_game to the matching channel name from that list, or null if unsure/not game-specific.
|
||||
|
||||
USER NOTES: If provided, use to calibrate (e.g. if notes say "uses heavy profanity casually", profanity alone should score lower). Add a note_update only for genuinely new behavioral observations; null otherwise.
|
||||
USER NOTES: If provided, use to calibrate (e.g. if notes say "uses heavy profanity casually", profanity alone should score lower). Add a note_update only for genuinely new behavioral observations; null otherwise. NEVER quote or repeat toxic/offensive language in note_update — describe patterns abstractly (e.g. "directed a personal insult at another user", NOT "called someone a [slur]").
|
||||
|
||||
RULE ENFORCEMENT: If SERVER RULES are provided, report clearly violated rule numbers in violated_rules. Only flag clear violations, not borderline.
|
||||
|
||||
|
||||
+47
-2
@@ -86,7 +86,7 @@ ANALYSIS_TOOL = {
|
||||
},
|
||||
"note_update": {
|
||||
"type": ["string", "null"],
|
||||
"description": "Brief new observation about this user's style/behavior for future reference, or null if nothing new.",
|
||||
"description": "Brief new observation about this user's style/behavior for future reference, or null if nothing new. NEVER quote toxic language — describe patterns abstractly (e.g. 'uses personal insults when frustrated').",
|
||||
},
|
||||
"detected_game": {
|
||||
"type": ["string", "null"],
|
||||
@@ -189,7 +189,7 @@ CONVERSATION_TOOL = {
|
||||
},
|
||||
"note_update": {
|
||||
"type": ["string", "null"],
|
||||
"description": "New observation about this user's pattern, or null.",
|
||||
"description": "New observation about this user's pattern, or null. NEVER quote toxic language — describe patterns abstractly.",
|
||||
},
|
||||
"detected_game": {
|
||||
"type": ["string", "null"],
|
||||
@@ -977,6 +977,51 @@ class LLMClient:
|
||||
"profile_update": profile_update,
|
||||
}
|
||||
|
||||
async def sanitize_notes(self, notes: str) -> str:
|
||||
"""Rewrite user notes to remove any quoted toxic/offensive language.
|
||||
|
||||
Returns the sanitized notes string, or the original on failure.
|
||||
"""
|
||||
if not notes or len(notes.strip()) == 0:
|
||||
return notes
|
||||
|
||||
system_prompt = (
|
||||
"Rewrite the following user behavior notes. Remove any quoted offensive language, "
|
||||
"slurs, or profanity. Replace toxic quotes with abstract descriptions of the behavior "
|
||||
"(e.g. 'directed a personal insult at another user' instead of quoting the insult). "
|
||||
"Preserve all non-toxic observations, timestamps, and behavioral patterns exactly. "
|
||||
"Return ONLY the rewritten notes, nothing else."
|
||||
)
|
||||
user_content = notes
|
||||
if self._no_think:
|
||||
user_content += "\n/no_think"
|
||||
|
||||
t0 = time.monotonic()
|
||||
async with self._semaphore:
|
||||
try:
|
||||
temp_kwargs = {"temperature": 0.1} if self._supports_temperature else {}
|
||||
response = await self._client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_content},
|
||||
],
|
||||
**temp_kwargs,
|
||||
max_completion_tokens=1024,
|
||||
)
|
||||
elapsed = int((time.monotonic() - t0) * 1000)
|
||||
result = response.choices[0].message.content
|
||||
if result and result.strip():
|
||||
self._log_llm("sanitize_notes", elapsed, True, notes[:300], result[:300])
|
||||
return result.strip()
|
||||
self._log_llm("sanitize_notes", elapsed, False, notes[:300], error="Empty response")
|
||||
return notes
|
||||
except Exception as e:
|
||||
elapsed = int((time.monotonic() - t0) * 1000)
|
||||
logger.error("LLM sanitize_notes error: %s", e)
|
||||
self._log_llm("sanitize_notes", elapsed, False, notes[:300], error=str(e))
|
||||
return notes
|
||||
|
||||
async def analyze_image(
|
||||
self,
|
||||
image_bytes: bytes,
|
||||
|
||||
Reference in New Issue
Block a user