Add LLM request queue, streaming chat, and rename ollama_client to llm_client
- Serialize all LLM requests through an asyncio semaphore to prevent overloading athena with concurrent requests - Switch chat() to streaming so the typing indicator only appears once the model starts generating (not during thinking/loading) - Increase LLM timeout from 5 to 10 minutes for slow first loads - Rename ollama_client.py to llm_client.py and self.ollama to self.llm since the bot uses a generic OpenAI-compatible API - Update embed labels from "Ollama" to "LLM" Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -126,8 +126,8 @@ class CommandsCog(commands.Cog):
|
||||
inline=True,
|
||||
)
|
||||
embed.add_field(
|
||||
name="Ollama",
|
||||
value=f"`{self.bot.ollama.model}` @ `{self.bot.ollama.host}`",
|
||||
name="LLM",
|
||||
value=f"`{self.bot.llm.model}` @ `{self.bot.llm.host}`",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
@@ -301,7 +301,7 @@ class CommandsCog(commands.Cog):
|
||||
else "(no prior context)"
|
||||
)
|
||||
|
||||
result = await self.bot.ollama.analyze_message(msg.content, context)
|
||||
result = await self.bot.llm.analyze_message(msg.content, context)
|
||||
if result is None:
|
||||
embed = discord.Embed(
|
||||
title=f"Analysis: {msg.author.display_name}",
|
||||
@@ -359,7 +359,7 @@ class CommandsCog(commands.Cog):
|
||||
await interaction.response.defer(ephemeral=True)
|
||||
|
||||
user_notes = self.bot.drama_tracker.get_user_notes(interaction.user.id)
|
||||
raw, parsed = await self.bot.ollama.raw_analyze(message, user_notes=user_notes)
|
||||
raw, parsed = await self.bot.llm.raw_analyze(message, user_notes=user_notes)
|
||||
|
||||
embed = discord.Embed(
|
||||
title="BCS Test Analysis", color=discord.Color.blue()
|
||||
@@ -368,7 +368,7 @@ class CommandsCog(commands.Cog):
|
||||
name="Input Message", value=message[:1024], inline=False
|
||||
)
|
||||
embed.add_field(
|
||||
name="Raw Ollama Response",
|
||||
name="Raw LLM Response",
|
||||
value=f"```json\n{raw[:1000]}\n```",
|
||||
inline=False,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user