- Serialize all LLM requests through an asyncio semaphore to prevent overloading athena with concurrent requests - Switch chat() to streaming so the typing indicator only appears once the model starts generating (not during thinking/loading) - Increase LLM timeout from 5 to 10 minutes for slow first loads - Rename ollama_client.py to llm_client.py and self.ollama to self.llm since the bot uses a generic OpenAI-compatible API - Update embed labels from "Ollama" to "LLM" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
107 lines
3.4 KiB
Python
107 lines
3.4 KiB
Python
import logging
|
|
from collections import deque
|
|
from pathlib import Path
|
|
|
|
import discord
|
|
from discord.ext import commands
|
|
|
|
logger = logging.getLogger("bcs.chat")
|
|
|
|
_PROMPTS_DIR = Path(__file__).resolve().parent.parent / "prompts"
|
|
CHAT_PERSONALITY = (_PROMPTS_DIR / "chat_personality.txt").read_text(encoding="utf-8")
|
|
|
|
|
|
class ChatCog(commands.Cog):
|
|
def __init__(self, bot: commands.Bot):
|
|
self.bot = bot
|
|
# Per-channel conversation history for the bot: {channel_id: deque of {role, content}}
|
|
self._chat_history: dict[int, deque] = {}
|
|
|
|
@commands.Cog.listener()
|
|
async def on_message(self, message: discord.Message):
|
|
if message.author.bot:
|
|
return
|
|
|
|
if not message.guild:
|
|
return
|
|
|
|
should_reply = False
|
|
|
|
# Check if bot is @mentioned
|
|
if self.bot.user in message.mentions:
|
|
should_reply = True
|
|
|
|
# Check if replying to one of the bot's messages
|
|
if message.reference and message.reference.message_id:
|
|
try:
|
|
ref_msg = message.reference.cached_message
|
|
if ref_msg is None:
|
|
ref_msg = await message.channel.fetch_message(
|
|
message.reference.message_id
|
|
)
|
|
if ref_msg.author.id == self.bot.user.id:
|
|
should_reply = True
|
|
except discord.HTTPException:
|
|
pass
|
|
|
|
if not should_reply:
|
|
return
|
|
|
|
# Build conversation context
|
|
ch_id = message.channel.id
|
|
if ch_id not in self._chat_history:
|
|
self._chat_history[ch_id] = deque(maxlen=10)
|
|
|
|
# Clean the mention out of the message content
|
|
content = message.content.replace(f"<@{self.bot.user.id}>", "").strip()
|
|
if not content:
|
|
content = "(just pinged me)"
|
|
|
|
# Add drama score context to the user message
|
|
drama_score = self.bot.drama_tracker.get_drama_score(message.author.id)
|
|
user_data = self.bot.drama_tracker.get_user(message.author.id)
|
|
score_context = (
|
|
f"[Server context: {message.author.display_name} has a drama score of "
|
|
f"{drama_score:.2f}/1.0 and {user_data.offense_count} offenses. "
|
|
f"They are talking in #{message.channel.name}.]"
|
|
)
|
|
|
|
self._chat_history[ch_id].append(
|
|
{"role": "user", "content": f"{score_context}\n{message.author.display_name}: {content}"}
|
|
)
|
|
|
|
typing_ctx = None
|
|
|
|
async def start_typing():
|
|
nonlocal typing_ctx
|
|
typing_ctx = message.channel.typing()
|
|
await typing_ctx.__aenter__()
|
|
|
|
response = await self.bot.llm.chat(
|
|
list(self._chat_history[ch_id]),
|
|
CHAT_PERSONALITY,
|
|
on_first_token=start_typing,
|
|
)
|
|
|
|
if typing_ctx:
|
|
await typing_ctx.__aexit__(None, None, None)
|
|
|
|
if response is None:
|
|
response = "I'd roast you but my brain is offline. Try again later."
|
|
|
|
self._chat_history[ch_id].append(
|
|
{"role": "assistant", "content": response}
|
|
)
|
|
|
|
await message.reply(response, mention_author=False)
|
|
logger.info(
|
|
"Chat reply in #%s to %s: %s",
|
|
message.channel.name,
|
|
message.author.display_name,
|
|
response[:100],
|
|
)
|
|
|
|
|
|
async def setup(bot: commands.Bot):
|
|
await bot.add_cog(ChatCog(bot))
|