Add LLM request queue, streaming chat, and rename ollama_client to llm_client

- Serialize all LLM requests through an asyncio semaphore to prevent overloading athena with concurrent requests - Switch chat() to streaming so the typing indicator only appears once the model starts generating (not during thinking/loading) - Increase LLM timeout from 5 to 10 minutes for slow first loads - Rename ollama_client.py to llm_client.py and self.ollama to self.llm since the bot uses a generic OpenAI-compatible API - Update embed labels from "Ollama" to "LLM" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 13:45:12 -05:00
parent 645b924011
commit 1151b705c0
5 changed files with 120 additions and 87 deletions
--- a/cogs/commands.py
+++ b/cogs/commands.py
@@ -126,8 +126,8 @@ class CommandsCog(commands.Cog):
            inline=True,
        )
        embed.add_field(
-            name="Ollama",
-            value=f"`{self.bot.ollama.model}` @ `{self.bot.ollama.host}`",
+            name="LLM",
+            value=f"`{self.bot.llm.model}` @ `{self.bot.llm.host}`",
            inline=False,
        )

@@ -301,7 +301,7 @@ class CommandsCog(commands.Cog):
                else "(no prior context)"
            )

-            result = await self.bot.ollama.analyze_message(msg.content, context)
+            result = await self.bot.llm.analyze_message(msg.content, context)
            if result is None:
                embed = discord.Embed(
                    title=f"Analysis: {msg.author.display_name}",
@@ -359,7 +359,7 @@ class CommandsCog(commands.Cog):
        await interaction.response.defer(ephemeral=True)

        user_notes = self.bot.drama_tracker.get_user_notes(interaction.user.id)
-        raw, parsed = await self.bot.ollama.raw_analyze(message, user_notes=user_notes)
+        raw, parsed = await self.bot.llm.raw_analyze(message, user_notes=user_notes)

        embed = discord.Embed(
            title="BCS Test Analysis", color=discord.Color.blue()
@@ -368,7 +368,7 @@ class CommandsCog(commands.Cog):
            name="Input Message", value=message[:1024], inline=False
        )
        embed.add_field(
-            name="Raw Ollama Response",
+            name="Raw LLM Response",
            value=f"```json\n{raw[:1000]}\n```",
            inline=False,
        )