Add scoreboard roast feature via image analysis

When @mentioned with an image attachment, the bot now roasts players based on scoreboard screenshots using the vision model. Text-only mentions continue to work as before. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 16:30:26 -05:00
parent cf88f003ba
commit e41845de02
3 changed files with 117 additions and 22 deletions
@@ -9,6 +9,9 @@ logger = logging.getLogger("bcs.chat")
 _PROMPTS_DIR = Path(__file__).resolve().parent.parent / "prompts"
 CHAT_PERSONALITY = (_PROMPTS_DIR / "chat_personality.txt").read_text(encoding="utf-8")
 SCOREBOARD_ROAST = (_PROMPTS_DIR / "scoreboard_roast.txt").read_text(encoding="utf-8")
 _IMAGE_TYPES = {"png", "jpg", "jpeg", "gif", "webp"}
 class ChatCog(commands.Cog):
@@ -54,6 +57,41 @@ class ChatCog(commands.Cog):
        # Clean the mention out of the message content
        content = message.content.replace(f"<@{self.bot.user.id}>", "").strip()
        # Check for image attachments
        image_attachment = None
        for att in message.attachments:
            ext = att.filename.rsplit(".", 1)[-1].lower() if "." in att.filename else ""
            if ext in _IMAGE_TYPES:
                image_attachment = att
                break
        typing_ctx = None
        async def start_typing():
            nonlocal typing_ctx
            typing_ctx = message.channel.typing()
            await typing_ctx.__aenter__()
        if image_attachment:
            # --- Image path: scoreboard roast ---
            image_bytes = await image_attachment.read()
            user_text = content if content else "Roast this scoreboard."
            logger.info(
                "Image roast request in #%s from %s (%s, %s)",
                message.channel.name,
                message.author.display_name,
                image_attachment.filename,
                user_text[:80],
            )
            response = await self.bot.llm.analyze_image(
                image_bytes,
                SCOREBOARD_ROAST,
                user_text=user_text,
                on_first_token=start_typing,
            )
        else:
            # --- Text-only path: normal chat ---
            if not content:
                content = "(just pinged me)"
@@ -70,13 +108,6 @@ class ChatCog(commands.Cog):
                {"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"}
            )
        typing_ctx = None
        async def start_typing():
            nonlocal typing_ctx
            typing_ctx = message.channel.typing()
            await typing_ctx.__aenter__()
            response = await self.bot.llm.chat(
                list(self._chat_history[ch_id]),
                CHAT_PERSONALITY,
@@ -89,6 +120,7 @@ class ChatCog(commands.Cog):
        if response is None:
            response = "I'd roast you but my brain is offline. Try again later."
        if not image_attachment:
            self._chat_history[ch_id].append(
                {"role": "assistant", "content": response}
            )
@@ -0,0 +1,13 @@
 You are the Breehavior Monitor, a sassy hall-monitor bot in a gaming Discord server called "Skill Issue Support Group".
 Someone just sent you a scoreboard screenshot. Your job: read it, identify players and their stats, and roast them based on their performance.
 Guidelines:
 - Call out specific players by name and reference their actual stats (kills, deaths, K/D, score, placement)
 - Bottom-fraggers and negative K/D ratios deserve the most heat
 - Top players can get backhanded compliments ("wow you carried harder than a pack mule and still almost lost")
 - Keep it to 4-6 sentences max — punchy, not a wall of text
 - You're sassy and judgmental but always playful, never genuinely hurtful
 - Use gaming terminology naturally (diff, skill issue, carried, bot, touched grass, etc.)
 - If you can't read the scoreboard clearly, roast them for their screenshot quality instead
 - Do NOT break character or mention being an AI
@@ -1,4 +1,5 @@
 import asyncio
 import base64
 import json
 import logging
 from pathlib import Path
@@ -238,6 +239,55 @@ class LLMClient:
                logger.error("LLM chat error: %s", e)
                return None
    async def analyze_image(
        self,
        image_bytes: bytes,
        system_prompt: str,
        user_text: str = "",
        on_first_token=None,
    ) -> str | None:
        """Send an image to the vision model with a system prompt.
        Returns the generated text response, or None on failure.
        """
        b64 = base64.b64encode(image_bytes).decode()
        data_url = f"data:image/png;base64,{b64}"
        user_content: list[dict] = [
            {"type": "image_url", "image_url": {"url": data_url}},
        ]
        if user_text:
            user_content.append({"type": "text", "text": user_text})
        async with self._semaphore:
            try:
                stream = await self._client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_content},
                    ],
                    temperature=0.8,
                    max_tokens=500,
                    stream=True,
                )
                chunks: list[str] = []
                notified = False
                async for chunk in stream:
                    delta = chunk.choices[0].delta if chunk.choices else None
                    if delta and delta.content:
                        if not notified and on_first_token:
                            await on_first_token()
                            notified = True
                        chunks.append(delta.content)
                content = "".join(chunks).strip()
                return content if content else None
            except Exception as e:
                logger.error("LLM image analysis error: %s", e)
                return None
    async def raw_analyze(self, message: str, context: str = "", user_notes: str = "") -> tuple[str, dict | None]:
        """Return the raw LLM response string AND parsed result for /bcs-test (single LLM call)."""
        user_content = f"=== CONTEXT (other users' recent messages, for background only) ===\n{context}\n\n"