Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 53803d920f | |||
| b7076dffe2 | |||
| c5316b98d1 | |||
| f75a3ca3f4 |
@@ -171,6 +171,8 @@ class ChatCog(commands.Cog):
|
|||||||
# Update profile if warranted
|
# Update profile if warranted
|
||||||
profile_update = result.get("profile_update")
|
profile_update = result.get("profile_update")
|
||||||
if profile_update:
|
if profile_update:
|
||||||
|
# Sanitize before storing — strips any quoted toxic language
|
||||||
|
profile_update = await self.bot.llm.sanitize_notes(profile_update)
|
||||||
self.bot.drama_tracker.set_user_profile(user_id, profile_update)
|
self.bot.drama_tracker.set_user_profile(user_id, profile_update)
|
||||||
self._dirty_users.add(user_id)
|
self._dirty_users.add(user_id)
|
||||||
|
|
||||||
|
|||||||
@@ -469,13 +469,14 @@ class SentimentCog(commands.Cog):
|
|||||||
|
|
||||||
# Note update — route to memory system
|
# Note update — route to memory system
|
||||||
if note_update:
|
if note_update:
|
||||||
# Still update the legacy notes for backward compat with analysis prompt
|
# Sanitize before storing — strips any quoted toxic language
|
||||||
self.bot.drama_tracker.update_user_notes(user_id, note_update)
|
sanitized = await self.bot.llm.sanitize_notes(note_update)
|
||||||
|
self.bot.drama_tracker.update_user_notes(user_id, sanitized)
|
||||||
self._dirty_users.add(user_id)
|
self._dirty_users.add(user_id)
|
||||||
# Also save as an expiring memory (7d default for passive observations)
|
# Also save as an expiring memory (7d default for passive observations)
|
||||||
asyncio.create_task(self.bot.db.save_memory(
|
asyncio.create_task(self.bot.db.save_memory(
|
||||||
user_id=user_id,
|
user_id=user_id,
|
||||||
memory=note_update[:500],
|
memory=sanitized[:500],
|
||||||
topics=db_topic_category or "general",
|
topics=db_topic_category or "general",
|
||||||
importance="medium",
|
importance="medium",
|
||||||
expires_at=datetime.now(timezone.utc) + timedelta(days=7),
|
expires_at=datetime.now(timezone.utc) + timedelta(days=7),
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ TOPIC: Flag off_topic if the message is personal drama (relationship issues, feu
|
|||||||
|
|
||||||
GAME DETECTION: If CHANNEL INFO is provided, set detected_game to the matching channel name from that list, or null if unsure/not game-specific.
|
GAME DETECTION: If CHANNEL INFO is provided, set detected_game to the matching channel name from that list, or null if unsure/not game-specific.
|
||||||
|
|
||||||
USER NOTES: If provided, use to calibrate (e.g. if notes say "uses heavy profanity casually", profanity alone should score lower). Add a note_update only for genuinely new behavioral observations; null otherwise.
|
USER NOTES: If provided, use to calibrate (e.g. if notes say "uses heavy profanity casually", profanity alone should score lower). Add a note_update only for genuinely new behavioral observations; null otherwise. NEVER quote or repeat toxic/offensive language in note_update — describe patterns abstractly (e.g. "directed a personal insult at another user", NOT "called someone a [slur]").
|
||||||
|
|
||||||
RULE ENFORCEMENT: If SERVER RULES are provided, report clearly violated rule numbers in violated_rules. Only flag clear violations, not borderline.
|
RULE ENFORCEMENT: If SERVER RULES are provided, report clearly violated rule numbers in violated_rules. Only flag clear violations, not borderline.
|
||||||
|
|
||||||
|
|||||||
+47
-2
@@ -86,7 +86,7 @@ ANALYSIS_TOOL = {
|
|||||||
},
|
},
|
||||||
"note_update": {
|
"note_update": {
|
||||||
"type": ["string", "null"],
|
"type": ["string", "null"],
|
||||||
"description": "Brief new observation about this user's style/behavior for future reference, or null if nothing new.",
|
"description": "Brief new observation about this user's style/behavior for future reference, or null if nothing new. NEVER quote toxic language — describe patterns abstractly (e.g. 'uses personal insults when frustrated').",
|
||||||
},
|
},
|
||||||
"detected_game": {
|
"detected_game": {
|
||||||
"type": ["string", "null"],
|
"type": ["string", "null"],
|
||||||
@@ -189,7 +189,7 @@ CONVERSATION_TOOL = {
|
|||||||
},
|
},
|
||||||
"note_update": {
|
"note_update": {
|
||||||
"type": ["string", "null"],
|
"type": ["string", "null"],
|
||||||
"description": "New observation about this user's pattern, or null.",
|
"description": "New observation about this user's pattern, or null. NEVER quote toxic language — describe patterns abstractly.",
|
||||||
},
|
},
|
||||||
"detected_game": {
|
"detected_game": {
|
||||||
"type": ["string", "null"],
|
"type": ["string", "null"],
|
||||||
@@ -977,6 +977,51 @@ class LLMClient:
|
|||||||
"profile_update": profile_update,
|
"profile_update": profile_update,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def sanitize_notes(self, notes: str) -> str:
|
||||||
|
"""Rewrite user notes to remove any quoted toxic/offensive language.
|
||||||
|
|
||||||
|
Returns the sanitized notes string, or the original on failure.
|
||||||
|
"""
|
||||||
|
if not notes or len(notes.strip()) == 0:
|
||||||
|
return notes
|
||||||
|
|
||||||
|
system_prompt = (
|
||||||
|
"Rewrite the following user behavior notes. Remove any quoted offensive language, "
|
||||||
|
"slurs, or profanity. Replace toxic quotes with abstract descriptions of the behavior "
|
||||||
|
"(e.g. 'directed a personal insult at another user' instead of quoting the insult). "
|
||||||
|
"Preserve all non-toxic observations, timestamps, and behavioral patterns exactly. "
|
||||||
|
"Return ONLY the rewritten notes, nothing else."
|
||||||
|
)
|
||||||
|
user_content = notes
|
||||||
|
if self._no_think:
|
||||||
|
user_content += "\n/no_think"
|
||||||
|
|
||||||
|
t0 = time.monotonic()
|
||||||
|
async with self._semaphore:
|
||||||
|
try:
|
||||||
|
temp_kwargs = {"temperature": 0.1} if self._supports_temperature else {}
|
||||||
|
response = await self._client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": system_prompt},
|
||||||
|
{"role": "user", "content": user_content},
|
||||||
|
],
|
||||||
|
**temp_kwargs,
|
||||||
|
max_completion_tokens=1024,
|
||||||
|
)
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
|
result = response.choices[0].message.content
|
||||||
|
if result and result.strip():
|
||||||
|
self._log_llm("sanitize_notes", elapsed, True, notes[:300], result[:300])
|
||||||
|
return result.strip()
|
||||||
|
self._log_llm("sanitize_notes", elapsed, False, notes[:300], error="Empty response")
|
||||||
|
return notes
|
||||||
|
except Exception as e:
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
|
logger.error("LLM sanitize_notes error: %s", e)
|
||||||
|
self._log_llm("sanitize_notes", elapsed, False, notes[:300], error=str(e))
|
||||||
|
return notes
|
||||||
|
|
||||||
async def analyze_image(
|
async def analyze_image(
|
||||||
self,
|
self,
|
||||||
image_bytes: bytes,
|
image_bytes: bytes,
|
||||||
|
|||||||
Reference in New Issue
Block a user