Add LLM warm-up request at startup to preload model into VRAM
Sends a minimal 1-token completion during setup_hook so the model is ready before Discord messages start arriving, avoiding connection errors and slow first responses after a restart. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
12
bot.py
12
bot.py
@@ -97,6 +97,18 @@ class BCSBot(commands.Bot):
|
|||||||
await self.tree.sync()
|
await self.tree.sync()
|
||||||
logger.info("Slash commands synced.")
|
logger.info("Slash commands synced.")
|
||||||
|
|
||||||
|
# Warm up the LLM so the model is loaded into VRAM before messages arrive
|
||||||
|
logger.info("Warming up LLM model...")
|
||||||
|
try:
|
||||||
|
resp = await self.llm._client.chat.completions.create(
|
||||||
|
model=self.llm.model,
|
||||||
|
messages=[{"role": "user", "content": "hi"}],
|
||||||
|
max_tokens=1,
|
||||||
|
)
|
||||||
|
logger.info("LLM warm-up complete.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("LLM warm-up failed — first messages may be slow: %s", e)
|
||||||
|
|
||||||
async def on_message(self, message: discord.Message):
|
async def on_message(self, message: discord.Message):
|
||||||
logger.info(
|
logger.info(
|
||||||
"EVENT on_message from %s in #%s: %s",
|
"EVENT on_message from %s in #%s: %s",
|
||||||
|
|||||||
Reference in New Issue
Block a user