Support hybrid LLM: local Qwen triage + OpenAI escalation
Triage analysis runs on Qwen 8B (athena.lan) for free first-pass. Escalation, chat, image roasts, and commands use GPT-4o via OpenAI. Each tier gets its own base URL, API key, and concurrency settings. Local models get /no_think and serialized requests automatically. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
22
bot.py
22
bot.py
@@ -68,15 +68,25 @@ class BCSBot(commands.Bot):
|
||||
# Database (initialized async in setup_hook)
|
||||
self.db = Database()
|
||||
|
||||
# LLM clients (OpenAI — set LLM_BASE_URL to override for local models)
|
||||
# Triage LLM (local Qwen on athena for cheap first-pass analysis)
|
||||
llm_base_url = os.getenv("LLM_BASE_URL", "")
|
||||
llm_model = os.getenv("LLM_MODEL", "gpt-4o-mini")
|
||||
llm_api_key = os.getenv("LLM_API_KEY", "")
|
||||
self.llm = LLMClient(llm_base_url, llm_model, llm_api_key, db=self.db)
|
||||
llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
|
||||
is_local = bool(llm_base_url)
|
||||
self.llm = LLMClient(
|
||||
llm_base_url, llm_model, llm_api_key, db=self.db,
|
||||
no_think=is_local, concurrency=1 if is_local else 4,
|
||||
)
|
||||
|
||||
# Heavy/escalation model for re-analysis, chat, and manual commands
|
||||
llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", "gpt-4o")
|
||||
self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key, db=self.db)
|
||||
# Heavy/escalation LLM (OpenAI for re-analysis, chat, image roasts, commands)
|
||||
esc_base_url = os.getenv("LLM_ESCALATION_BASE_URL", "")
|
||||
esc_model = os.getenv("LLM_ESCALATION_MODEL", "gpt-4o")
|
||||
esc_api_key = os.getenv("LLM_ESCALATION_API_KEY", llm_api_key)
|
||||
esc_is_local = bool(esc_base_url)
|
||||
self.llm_heavy = LLMClient(
|
||||
esc_base_url, esc_model, esc_api_key, db=self.db,
|
||||
no_think=esc_is_local, concurrency=1 if esc_is_local else 4,
|
||||
)
|
||||
|
||||
# Active mode (server-wide)
|
||||
modes_config = config.get("modes", {})
|
||||
|
||||
Reference in New Issue
Block a user