diff --git a/bot.py b/bot.py index d9c88c1..0dbeb9d 100644 --- a/bot.py +++ b/bot.py @@ -78,7 +78,7 @@ class BCSBot(commands.Bot): no_think=is_local, concurrency=1 if is_local else 4, ) - # Heavy/escalation LLM (OpenAI for re-analysis, chat, image roasts, commands) + # Heavy/escalation LLM (OpenAI for re-analysis, image roasts, commands) esc_base_url = os.getenv("LLM_ESCALATION_BASE_URL", "") esc_model = os.getenv("LLM_ESCALATION_MODEL", "gpt-4o") esc_api_key = os.getenv("LLM_ESCALATION_API_KEY", llm_api_key) @@ -88,6 +88,19 @@ class BCSBot(commands.Bot): no_think=esc_is_local, concurrency=1 if esc_is_local else 4, ) + # Chat LLM (dedicated model for chat/roasts — defaults to llm_heavy) + chat_model = os.getenv("LLM_CHAT_MODEL", "") + chat_api_key = os.getenv("LLM_CHAT_API_KEY", esc_api_key) + chat_base_url = os.getenv("LLM_CHAT_BASE_URL", esc_base_url) + if chat_model: + chat_is_local = bool(chat_base_url) + self.llm_chat = LLMClient( + chat_base_url, chat_model, chat_api_key, db=self.db, + no_think=chat_is_local, concurrency=4, + ) + else: + self.llm_chat = self.llm_heavy + # Active mode (server-wide) modes_config = config.get("modes", {}) self.current_mode = modes_config.get("default_mode", "default") @@ -199,6 +212,8 @@ class BCSBot(commands.Bot): await self.db.close() await self.llm.close() await self.llm_heavy.close() + if self.llm_chat is not self.llm_heavy: + await self.llm_chat.close() await super().close() diff --git a/cogs/chat.py b/cogs/chat.py index 93f1e74..e917548 100644 --- a/cogs/chat.py +++ b/cogs/chat.py @@ -194,7 +194,7 @@ class ChatCog(commands.Cog): if m["role"] == "assistant" ][-5:] - response = await self.bot.llm.chat( + response = await self.bot.llm_chat.chat( list(self._chat_history[ch_id]), active_prompt, on_first_token=start_typing, @@ -312,7 +312,7 @@ class ChatCog(commands.Cog): if m["role"] == "assistant" ][-5:] - response = await self.bot.llm.chat( + response = await self.bot.llm_chat.chat( list(self._chat_history[ch_id]), active_prompt, recent_bot_replies=recent_bot_replies,