diff --git a/utils/llm_client.py b/utils/llm_client.py index f67898a..c5033ee 100644 --- a/utils/llm_client.py +++ b/utils/llm_client.py @@ -977,6 +977,51 @@ class LLMClient: "profile_update": profile_update, } + async def sanitize_notes(self, notes: str) -> str: + """Rewrite user notes to remove any quoted toxic/offensive language. + + Returns the sanitized notes string, or the original on failure. + """ + if not notes or len(notes.strip()) == 0: + return notes + + system_prompt = ( + "Rewrite the following user behavior notes. Remove any quoted offensive language, " + "slurs, or profanity. Replace toxic quotes with abstract descriptions of the behavior " + "(e.g. 'directed a personal insult at another user' instead of quoting the insult). " + "Preserve all non-toxic observations, timestamps, and behavioral patterns exactly. " + "Return ONLY the rewritten notes, nothing else." + ) + user_content = notes + if self._no_think: + user_content += "\n/no_think" + + t0 = time.monotonic() + async with self._semaphore: + try: + temp_kwargs = {"temperature": 0.1} if self._supports_temperature else {} + response = await self._client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_content}, + ], + **temp_kwargs, + max_completion_tokens=1024, + ) + elapsed = int((time.monotonic() - t0) * 1000) + result = response.choices[0].message.content + if result and result.strip(): + self._log_llm("sanitize_notes", elapsed, True, notes[:300], result[:300]) + return result.strip() + self._log_llm("sanitize_notes", elapsed, False, notes[:300], error="Empty response") + return notes + except Exception as e: + elapsed = int((time.monotonic() - t0) * 1000) + logger.error("LLM sanitize_notes error: %s", e) + self._log_llm("sanitize_notes", elapsed, False, notes[:300], error=str(e)) + return notes + async def analyze_image( self, image_bytes: bytes,