fix: address multiple bugs found in code review

- Fix dirty-user flush race: discard IDs individually after successful save - Escape LIKE wildcards in LLM-generated topic keywords for DB queries - Anonymize absent-member aliases to prevent LLM de-anonymization - Pass correct MIME type to vision model based on image file extension - Use enumerate instead of list.index() in bcs-scan loop - Allow bot @mentions with non-report intent to fall through to moderation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-01 01:16:38 -05:00
parent eb7eb81621
commit 2ec9b16b99
6 changed files with 35 additions and 22 deletions
@@ -273,11 +273,14 @@ class ChatCog(commands.Cog):
                image_attachment.filename,
                user_text[:80],
            )
            ext = image_attachment.filename.rsplit(".", 1)[-1].lower() if "." in image_attachment.filename else "png"
            mime = f"image/{'jpeg' if ext == 'jpg' else ext}"
            response = await self.bot.llm_heavy.analyze_image(
                image_bytes,
                IMAGE_ROAST,
                user_text=user_text,
                on_first_token=start_typing,
                media_type=mime,
            )
        else:
            # --- Text-only path: normal chat ---
@@ -320,9 +320,8 @@ class CommandsCog(commands.Cog):
            f"Scanning {len(messages)} messages... (first request may be slow while model loads)"
        )
-        for msg in messages:
+        for idx, msg in enumerate(messages):
            # Build context from the messages before this one
            idx = messages.index(msg)
            ctx_msgs = messages[max(0, idx - 3):idx]
            context = (
                " | ".join(f"{m.author.display_name}: {m.content}" for m in ctx_msgs)
@@ -145,7 +145,9 @@ class SentimentCog(commands.Cog):
                mention_config = config.get("mention_scan", {})
                if mention_config.get("enabled", True):
                    await self._maybe_start_mention_scan(message, mention_config)
-            return
+                return
            # For non-report intents, fall through to buffer the message
            # so it still gets scored for toxicity
        # Skip if empty
        if not message.content or not message.content.strip():
@@ -317,11 +319,13 @@ class SentimentCog(commands.Cog):
            if aliases:
                anon_key = anon_map.get(msg.author.display_name, msg.author.display_name)
                lines.append(f"  {anon_key} is also known as: {', '.join(aliases)}")
-        # Also include aliases for members NOT in the conversation (so the LLM
+        # Include aliases for members NOT in the conversation (so the LLM
-        # can recognize name-drops of absent members)
+        # can recognize name-drops of absent members), using anonymized keys
        absent_idx = 0
        for uid, aliases in all_aliases.items():
            if uid not in seen_ids:
-                lines.append(f"  (not in chat) also known as: {', '.join(aliases)}")
+                absent_idx += 1
                lines.append(f"  Absent_{absent_idx} is also known as: {', '.join(aliases)}")
        return "\n".join(lines) if lines else ""
    @staticmethod
@@ -32,19 +32,24 @@ async def flush_dirty_states(bot, dirty_users: set[int]) -> None:
    if not dirty_users:
        return
    dirty = list(dirty_users)
-    dirty_users.clear()
+    saved = 0
    for user_id in dirty:
        user_data = bot.drama_tracker.get_user(user_id)
-        await bot.db.save_user_state(
+        try:
-            user_id=user_id,
+            await bot.db.save_user_state(
-            offense_count=user_data.offense_count,
+                user_id=user_id,
-            immune=user_data.immune,
+                offense_count=user_data.offense_count,
-            off_topic_count=user_data.off_topic_count,
+                immune=user_data.immune,
-            baseline_coherence=user_data.baseline_coherence,
+                off_topic_count=user_data.off_topic_count,
-            user_notes=user_data.notes or None,
+                baseline_coherence=user_data.baseline_coherence,
-            warned=user_data.warned_since_reset,
+                user_notes=user_data.notes or None,
-            last_offense_at=user_data.last_offense_time or None,
+                warned=user_data.warned_since_reset,
-            aliases=_aliases_csv(user_data),
+                last_offense_at=user_data.last_offense_time or None,
-            warning_expires_at=user_data.warning_expires_at or None,
+                aliases=_aliases_csv(user_data),
-        )
+                warning_expires_at=user_data.warning_expires_at or None,
-    logger.info("Flushed %d dirty user states to DB.", len(dirty))
+            )
            dirty_users.discard(user_id)
            saved += 1
        except Exception:
            logger.exception("Failed to flush state for user %d", user_id)
    logger.info("Flushed %d/%d dirty user states to DB.", saved, len(dirty))
@@ -628,7 +628,8 @@ class Database:
                return []
            # Build OR conditions for each keyword
            conditions = " OR ".join(["Topics LIKE ?" for _ in topic_keywords])
-            params = [limit, user_id] + [f"%{kw}%" for kw in topic_keywords]
+            escaped = [kw.replace("%", "[%]").replace("_", "[_]") for kw in topic_keywords]
            params = [limit, user_id] + [f"%{kw}%" for kw in escaped]
            cursor.execute(
                f"""SELECT TOP (?) Memory, Topics, Importance, CreatedAt
                    FROM UserMemory
@@ -865,13 +865,14 @@ class LLMClient:
        system_prompt: str,
        user_text: str = "",
        on_first_token=None,
        media_type: str = "image/png",
    ) -> str | None:
        """Send an image to the vision model with a system prompt.
        Returns the generated text response, or None on failure.
        """
        b64 = base64.b64encode(image_bytes).decode()
-        data_url = f"data:image/png;base64,{b64}"
+        data_url = f"data:{media_type};base64,{b64}"
        user_content: list[dict] = [
            {"type": "image_url", "image_url": {"url": data_url}},