Breehavior-Monitor/utils/ollama_client.py

import json
import logging

from openai import AsyncOpenAI

logger = logging.getLogger("bcs.llm")

SYSTEM_PROMPT = """You are a Discord chat moderator AI for a gaming server. You will be given a TARGET MESSAGE to analyze, plus recent channel context for background.

CRITICAL: Only score the TARGET MESSAGE. The context is ONLY for understanding tone and conversation flow. Do NOT score the context messages — they belong to other users and are already being analyzed separately.

CONTEXT — This is a friend group who use crude nicknames (e.g. "tits" is someone's nickname). A nickname alone is NOT toxic. However, you must still flag genuinely aggressive language.

SCORING GUIDE — Be precise with scores:
- 0.0-0.1: Completely harmless. Casual chat, jokes, "lmao", greetings, game talk, nicknames.
- 0.2-0.3: Mildly edgy. Light trash-talk that's clearly playful ("you suck at this game lol").
- 0.4-0.5: Moderate. Frustrated or heated language that could go either way. Insults without "lol/lmao" softening them.
- 0.6-0.7: Aggressive. Direct insults, hostility, telling someone off. Even among friends, "fuck you, you piece of shit" is aggressive language that should score here regardless of context.
- 0.8-1.0: Severely toxic. Threats, targeted harassment, telling someone to leave, attacking insecurities, sustained personal attacks.

IMPORTANT RULES:
- "Tits" as a nickname = 0.0, not toxic.
- Profanity ALONE (just "fuck" or "shit" with no target) = low score (0.0-0.1).
- Profanity DIRECTED AT someone ("fuck you", "you piece of shit") = moderate-to-high score (0.5-0.7) even among friends.
- Do NOT let friendly context excuse clearly aggressive language. Friends can still cross lines.
- If a message contains BOTH a nickname AND an insult ("fuck you tits you piece of shit"), score the insult, not the nickname.
- If the target message is just "lmao", "lol", an emoji, or a short neutral reaction, it is ALWAYS 0.0 regardless of what other people said before it.

Also determine if the message is on-topic (gaming, games, matches, strategy, LFG, etc.) or off-topic personal drama (relationship issues, personal feuds, venting about real-life problems, gossip about people outside the server).

Also assess the message's coherence — how well-formed, readable, and grammatically correct it is.
- 0.9-1.0: Clear, well-written, normal for this user
- 0.6-0.8: Some errors but still understandable (normal texting shortcuts like "u" and "ur" are fine — don't penalize those)
- 0.3-0.5: Noticeably degraded — garbled words, missing letters, broken sentences beyond normal shorthand
- 0.0-0.2: Nearly incoherent — can barely understand what they're trying to say

You may also be given NOTES about this user from prior interactions. Use these to calibrate your scoring — for example, if notes say "uses heavy profanity casually" then profanity alone should score lower for this user.

If you notice something noteworthy about this user's communication style, behavior, or patterns that would help future analysis, include it as a note_update. Only add genuinely useful observations — don't repeat what's already in the notes. If nothing new, leave note_update as null.

Use the report_analysis tool to report your analysis of the TARGET MESSAGE only."""

ANALYSIS_TOOL = {
    "type": "function",
    "function": {
        "name": "report_analysis",
        "description": "Report the toxicity and topic analysis of a Discord message.",
        "parameters": {
            "type": "object",
            "properties": {
                "toxicity_score": {
                    "type": "number",
                    "description": "Toxicity rating from 0.0 (completely harmless) to 1.0 (extremely toxic).",
                },
                "categories": {
                    "type": "array",
                    "items": {
                        "type": "string",
                        "enum": [
                            "aggressive",
                            "passive_aggressive",
                            "instigating",
                            "hostile",
                            "manipulative",
                            "none",
                        ],
                    },
                    "description": "Detected toxicity behavior categories.",
                },
                "reasoning": {
                    "type": "string",
                    "description": "Brief explanation of the toxicity analysis.",
                },
                "off_topic": {
                    "type": "boolean",
                    "description": "True if the message is off-topic personal drama rather than gaming-related conversation.",
                },
                "topic_category": {
                    "type": "string",
                    "enum": [
                        "gaming",
                        "personal_drama",
                        "relationship_issues",
                        "real_life_venting",
                        "gossip",
                        "general_chat",
                        "meta",
                    ],
                    "description": "What topic category the message falls into.",
                },
                "topic_reasoning": {
                    "type": "string",
                    "description": "Brief explanation of the topic classification.",
                },
                "coherence_score": {
                    "type": "number",
                    "description": "Coherence rating from 0.0 (incoherent gibberish) to 1.0 (clear and well-written). Normal texting shortcuts are fine.",
                },
                "coherence_flag": {
                    "type": "string",
                    "enum": [
                        "normal",
                        "intoxicated",
                        "tired",
                        "angry_typing",
                        "mobile_keyboard",
                        "language_barrier",
                    ],
                    "description": "Best guess at why coherence is low, if applicable.",
                },
                "note_update": {
                    "type": ["string", "null"],
                    "description": "Brief new observation about this user's style/behavior for future reference, or null if nothing new.",
                },
            },
            "required": ["toxicity_score", "categories", "reasoning", "off_topic", "topic_category", "topic_reasoning", "coherence_score", "coherence_flag"],
        },
    },
}


class LLMClient:
    def __init__(self, base_url: str, model: str, api_key: str = "not-needed"):
        self.model = model
        self.host = base_url.rstrip("/")
        self._client = AsyncOpenAI(
            base_url=f"{self.host}/v1",
            api_key=api_key,
            timeout=300.0,  # 5 min — first request loads model into VRAM
        )

    async def close(self):
        await self._client.close()

    async def analyze_message(
        self, message: str, context: str = "", user_notes: str = ""
    ) -> dict | None:
        user_content = f"=== CONTEXT (other users' recent messages, for background only) ===\n{context}\n\n"
        if user_notes:
            user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
        user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}"

        try:
            response = await self._client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user", "content": user_content},
                ],
                tools=[ANALYSIS_TOOL],
                tool_choice={"type": "function", "function": {"name": "report_analysis"}},
                temperature=0.1,
            )

            choice = response.choices[0]

            # Extract tool call arguments
            if choice.message.tool_calls:
                tool_call = choice.message.tool_calls[0]
                args = json.loads(tool_call.function.arguments)
                return self._validate_result(args)

            # Fallback: try parsing the message content as JSON
            if choice.message.content:
                return self._parse_content_fallback(choice.message.content)

            logger.warning("No tool call or content in LLM response.")
            return None

        except Exception as e:
            logger.error("LLM analysis error: %s", e)
            return None

    def _validate_result(self, result: dict) -> dict:
        score = float(result.get("toxicity_score", 0.0))
        result["toxicity_score"] = min(max(score, 0.0), 1.0)

        if not isinstance(result.get("categories"), list):
            result["categories"] = ["none"]

        if not isinstance(result.get("reasoning"), str):
            result["reasoning"] = ""

        result["off_topic"] = bool(result.get("off_topic", False))
        result.setdefault("topic_category", "general_chat")
        result.setdefault("topic_reasoning", "")

        coherence = float(result.get("coherence_score", 0.85))
        result["coherence_score"] = min(max(coherence, 0.0), 1.0)
        result.setdefault("coherence_flag", "normal")

        result.setdefault("note_update", None)

        return result

    def _parse_content_fallback(self, text: str) -> dict | None:
        """Try to parse plain-text content as JSON if tool calling didn't work."""
        import re

        # Try direct JSON
        try:
            result = json.loads(text.strip())
            return self._validate_result(result)
        except (json.JSONDecodeError, ValueError):
            pass

        # Try extracting from code block
        match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
        if match:
            try:
                result = json.loads(match.group(1))
                return self._validate_result(result)
            except (json.JSONDecodeError, ValueError):
                pass

        # Regex fallback for toxicity_score
        score_match = re.search(r'"toxicity_score"\s*:\s*([\d.]+)', text)
        if score_match:
            return {
                "toxicity_score": min(max(float(score_match.group(1)), 0.0), 1.0),
                "categories": ["unknown"],
                "reasoning": "Parsed via fallback regex",
            }

        logger.warning("Could not parse LLM content fallback: %s", text[:200])
        return None

    async def chat(
        self, messages: list[dict[str, str]], system_prompt: str
    ) -> str | None:
        """Send a conversational chat request (no tools)."""
        try:
            response = await self._client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    *messages,
                ],
                temperature=0.8,
                max_tokens=300,
            )
            content = response.choices[0].message.content
            return content.strip() if content else None
        except Exception as e:
            logger.error("LLM chat error: %s", e)
            return None

    async def raw_analyze(self, message: str, context: str = "", user_notes: str = "") -> tuple[str, dict | None]:
        """Return the raw LLM response string AND parsed result for /bcs-test (single LLM call)."""
        user_content = f"=== CONTEXT (other users' recent messages, for background only) ===\n{context}\n\n"
        if user_notes:
            user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
        user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}"

        try:
            response = await self._client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user", "content": user_content},
                ],
                tools=[ANALYSIS_TOOL],
                tool_choice={"type": "function", "function": {"name": "report_analysis"}},
                temperature=0.1,
            )

            choice = response.choices[0]
            parts = []
            parsed = None

            if choice.message.content:
                parts.append(f"Content: {choice.message.content}")

            if choice.message.tool_calls:
                for tc in choice.message.tool_calls:
                    parts.append(
                        f"Tool call: {tc.function.name}({tc.function.arguments})"
                    )
                # Parse the first tool call
                args = json.loads(choice.message.tool_calls[0].function.arguments)
                parsed = self._validate_result(args)
            elif choice.message.content:
                parsed = self._parse_content_fallback(choice.message.content)

            raw = "\n".join(parts) or "(empty response)"
            return raw, parsed

        except Exception as e:
            return f"Error: {e}", None