Support hybrid LLM: local Qwen triage + OpenAI escalation

Triage analysis runs on Qwen 8B (athena.lan) for free first-pass. Escalation, chat, image roasts, and commands use GPT-4o via OpenAI. Each tier gets its own base URL, API key, and concurrency settings. Local models get /no_think and serialized requests automatically. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-23 12:20:07 -05:00
parent b5e401f036
commit 8a06ddbd6e
3 changed files with 49 additions and 18 deletions
--- a/bot.py
+++ b/bot.py
@@ -68,15 +68,25 @@ class BCSBot(commands.Bot):
        # Database (initialized async in setup_hook)
        self.db = Database()

-        # LLM clients (OpenAI — set LLM_BASE_URL to override for local models)
+        # Triage LLM (local Qwen on athena for cheap first-pass analysis)
        llm_base_url = os.getenv("LLM_BASE_URL", "")
        llm_model = os.getenv("LLM_MODEL", "gpt-4o-mini")
-        llm_api_key = os.getenv("LLM_API_KEY", "")
-        self.llm = LLMClient(llm_base_url, llm_model, llm_api_key, db=self.db)
+        llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
+        is_local = bool(llm_base_url)
+        self.llm = LLMClient(
+            llm_base_url, llm_model, llm_api_key, db=self.db,
+            no_think=is_local, concurrency=1 if is_local else 4,
+        )

-        # Heavy/escalation model for re-analysis, chat, and manual commands
-        llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", "gpt-4o")
-        self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key, db=self.db)
+        # Heavy/escalation LLM (OpenAI for re-analysis, chat, image roasts, commands)
+        esc_base_url = os.getenv("LLM_ESCALATION_BASE_URL", "")
+        esc_model = os.getenv("LLM_ESCALATION_MODEL", "gpt-4o")
+        esc_api_key = os.getenv("LLM_ESCALATION_API_KEY", llm_api_key)
+        esc_is_local = bool(esc_base_url)
+        self.llm_heavy = LLMClient(
+            esc_base_url, esc_model, esc_api_key, db=self.db,
+            no_think=esc_is_local, concurrency=1 if esc_is_local else 4,
+        )

        # Active mode (server-wide)
        modes_config = config.get("modes", {})