Files
Breehavior-Monitor/cogs/chat.py
AJ Isaacs 1151b705c0 Add LLM request queue, streaming chat, and rename ollama_client to llm_client
- Serialize all LLM requests through an asyncio semaphore to prevent
  overloading athena with concurrent requests
- Switch chat() to streaming so the typing indicator only appears once
  the model starts generating (not during thinking/loading)
- Increase LLM timeout from 5 to 10 minutes for slow first loads
- Rename ollama_client.py to llm_client.py and self.ollama to self.llm
  since the bot uses a generic OpenAI-compatible API
- Update embed labels from "Ollama" to "LLM"

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 13:45:12 -05:00

107 lines
3.4 KiB
Python

import logging
from collections import deque
from pathlib import Path
import discord
from discord.ext import commands
logger = logging.getLogger("bcs.chat")
_PROMPTS_DIR = Path(__file__).resolve().parent.parent / "prompts"
CHAT_PERSONALITY = (_PROMPTS_DIR / "chat_personality.txt").read_text(encoding="utf-8")
class ChatCog(commands.Cog):
def __init__(self, bot: commands.Bot):
self.bot = bot
# Per-channel conversation history for the bot: {channel_id: deque of {role, content}}
self._chat_history: dict[int, deque] = {}
@commands.Cog.listener()
async def on_message(self, message: discord.Message):
if message.author.bot:
return
if not message.guild:
return
should_reply = False
# Check if bot is @mentioned
if self.bot.user in message.mentions:
should_reply = True
# Check if replying to one of the bot's messages
if message.reference and message.reference.message_id:
try:
ref_msg = message.reference.cached_message
if ref_msg is None:
ref_msg = await message.channel.fetch_message(
message.reference.message_id
)
if ref_msg.author.id == self.bot.user.id:
should_reply = True
except discord.HTTPException:
pass
if not should_reply:
return
# Build conversation context
ch_id = message.channel.id
if ch_id not in self._chat_history:
self._chat_history[ch_id] = deque(maxlen=10)
# Clean the mention out of the message content
content = message.content.replace(f"<@{self.bot.user.id}>", "").strip()
if not content:
content = "(just pinged me)"
# Add drama score context to the user message
drama_score = self.bot.drama_tracker.get_drama_score(message.author.id)
user_data = self.bot.drama_tracker.get_user(message.author.id)
score_context = (
f"[Server context: {message.author.display_name} has a drama score of "
f"{drama_score:.2f}/1.0 and {user_data.offense_count} offenses. "
f"They are talking in #{message.channel.name}.]"
)
self._chat_history[ch_id].append(
{"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"}
)
typing_ctx = None
async def start_typing():
nonlocal typing_ctx
typing_ctx = message.channel.typing()
await typing_ctx.__aenter__()
response = await self.bot.llm.chat(
list(self._chat_history[ch_id]),
CHAT_PERSONALITY,
on_first_token=start_typing,
)
if typing_ctx:
await typing_ctx.__aexit__(None, None, None)
if response is None:
response = "I'd roast you but my brain is offline. Try again later."
self._chat_history[ch_id].append(
{"role": "assistant", "content": response}
)
await message.reply(response, mention_author=False)
logger.info(
"Chat reply in #%s to %s: %s",
message.channel.name,
message.author.display_name,
response[:100],
)
async def setup(bot: commands.Bot):
await bot.add_cog(ChatCog(bot))