Use gpt-4o-mini for chat/roasts via dedicated LLM_CHAT_MODEL
Add a separate llm_chat client so chat responses use a smarter model (gpt-4o-mini) while analysis stays on the cheap local Qwen3-8B. Falls back to llm_heavy if LLM_CHAT_MODEL is not set. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
17
bot.py
17
bot.py
@@ -78,7 +78,7 @@ class BCSBot(commands.Bot):
|
|||||||
no_think=is_local, concurrency=1 if is_local else 4,
|
no_think=is_local, concurrency=1 if is_local else 4,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Heavy/escalation LLM (OpenAI for re-analysis, chat, image roasts, commands)
|
# Heavy/escalation LLM (OpenAI for re-analysis, image roasts, commands)
|
||||||
esc_base_url = os.getenv("LLM_ESCALATION_BASE_URL", "")
|
esc_base_url = os.getenv("LLM_ESCALATION_BASE_URL", "")
|
||||||
esc_model = os.getenv("LLM_ESCALATION_MODEL", "gpt-4o")
|
esc_model = os.getenv("LLM_ESCALATION_MODEL", "gpt-4o")
|
||||||
esc_api_key = os.getenv("LLM_ESCALATION_API_KEY", llm_api_key)
|
esc_api_key = os.getenv("LLM_ESCALATION_API_KEY", llm_api_key)
|
||||||
@@ -88,6 +88,19 @@ class BCSBot(commands.Bot):
|
|||||||
no_think=esc_is_local, concurrency=1 if esc_is_local else 4,
|
no_think=esc_is_local, concurrency=1 if esc_is_local else 4,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Chat LLM (dedicated model for chat/roasts — defaults to llm_heavy)
|
||||||
|
chat_model = os.getenv("LLM_CHAT_MODEL", "")
|
||||||
|
chat_api_key = os.getenv("LLM_CHAT_API_KEY", esc_api_key)
|
||||||
|
chat_base_url = os.getenv("LLM_CHAT_BASE_URL", esc_base_url)
|
||||||
|
if chat_model:
|
||||||
|
chat_is_local = bool(chat_base_url)
|
||||||
|
self.llm_chat = LLMClient(
|
||||||
|
chat_base_url, chat_model, chat_api_key, db=self.db,
|
||||||
|
no_think=chat_is_local, concurrency=4,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.llm_chat = self.llm_heavy
|
||||||
|
|
||||||
# Active mode (server-wide)
|
# Active mode (server-wide)
|
||||||
modes_config = config.get("modes", {})
|
modes_config = config.get("modes", {})
|
||||||
self.current_mode = modes_config.get("default_mode", "default")
|
self.current_mode = modes_config.get("default_mode", "default")
|
||||||
@@ -199,6 +212,8 @@ class BCSBot(commands.Bot):
|
|||||||
await self.db.close()
|
await self.db.close()
|
||||||
await self.llm.close()
|
await self.llm.close()
|
||||||
await self.llm_heavy.close()
|
await self.llm_heavy.close()
|
||||||
|
if self.llm_chat is not self.llm_heavy:
|
||||||
|
await self.llm_chat.close()
|
||||||
await super().close()
|
await super().close()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -194,7 +194,7 @@ class ChatCog(commands.Cog):
|
|||||||
if m["role"] == "assistant"
|
if m["role"] == "assistant"
|
||||||
][-5:]
|
][-5:]
|
||||||
|
|
||||||
response = await self.bot.llm.chat(
|
response = await self.bot.llm_chat.chat(
|
||||||
list(self._chat_history[ch_id]),
|
list(self._chat_history[ch_id]),
|
||||||
active_prompt,
|
active_prompt,
|
||||||
on_first_token=start_typing,
|
on_first_token=start_typing,
|
||||||
@@ -312,7 +312,7 @@ class ChatCog(commands.Cog):
|
|||||||
if m["role"] == "assistant"
|
if m["role"] == "assistant"
|
||||||
][-5:]
|
][-5:]
|
||||||
|
|
||||||
response = await self.bot.llm.chat(
|
response = await self.bot.llm_chat.chat(
|
||||||
list(self._chat_history[ch_id]),
|
list(self._chat_history[ch_id]),
|
||||||
active_prompt,
|
active_prompt,
|
||||||
recent_bot_replies=recent_bot_replies,
|
recent_bot_replies=recent_bot_replies,
|
||||||
|
|||||||
Reference in New Issue
Block a user