feat: add sanitize_notes() method to LLMClient

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 21:59:06 -05:00
parent f75a3ca3f4
commit c5316b98d1
1 changed files with 45 additions and 0 deletions
@@ -977,6 +977,51 @@ class LLMClient:
            "profile_update": profile_update,
        }

+    async def sanitize_notes(self, notes: str) -> str:
+        """Rewrite user notes to remove any quoted toxic/offensive language.
+
+        Returns the sanitized notes string, or the original on failure.
+        """
+        if not notes or len(notes.strip()) == 0:
+            return notes
+
+        system_prompt = (
+            "Rewrite the following user behavior notes. Remove any quoted offensive language, "
+            "slurs, or profanity. Replace toxic quotes with abstract descriptions of the behavior "
+            "(e.g. 'directed a personal insult at another user' instead of quoting the insult). "
+            "Preserve all non-toxic observations, timestamps, and behavioral patterns exactly. "
+            "Return ONLY the rewritten notes, nothing else."
+        )
+        user_content = notes
+        if self._no_think:
+            user_content += "\n/no_think"
+
+        t0 = time.monotonic()
+        async with self._semaphore:
+            try:
+                temp_kwargs = {"temperature": 0.1} if self._supports_temperature else {}
+                response = await self._client.chat.completions.create(
+                    model=self.model,
+                    messages=[
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user", "content": user_content},
+                    ],
+                    **temp_kwargs,
+                    max_completion_tokens=1024,
+                )
+                elapsed = int((time.monotonic() - t0) * 1000)
+                result = response.choices[0].message.content
+                if result and result.strip():
+                    self._log_llm("sanitize_notes", elapsed, True, notes[:300], result[:300])
+                    return result.strip()
+                self._log_llm("sanitize_notes", elapsed, False, notes[:300], error="Empty response")
+                return notes
+            except Exception as e:
+                elapsed = int((time.monotonic() - t0) * 1000)
+                logger.error("LLM sanitize_notes error: %s", e)
+                self._log_llm("sanitize_notes", elapsed, False, notes[:300], error=str(e))
+                return notes
+
    async def analyze_image(
        self,
        image_bytes: bytes,