Add two-tier LLM analysis with triage/escalation
Triage model (LLM_MODEL) handles every message cheaply. If toxicity >= 0.25, off_topic, or coherence < 0.6, the message is re-analyzed with the heavy model (LLM_ESCALATION_MODEL). Chat, image analysis, /bcs-test, and /bcs-scan always use the heavy model. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
7
bot.py
7
bot.py
@@ -65,12 +65,16 @@ class BCSBot(commands.Bot):
|
||||
|
||||
self.config = config
|
||||
|
||||
# LLM client (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
|
||||
# LLM clients (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
|
||||
llm_base_url = os.getenv("LLM_BASE_URL", "http://athena.lan:11434")
|
||||
llm_model = os.getenv("LLM_MODEL", "Qwen3-VL-32B-Thinking-Q8_0")
|
||||
llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
|
||||
self.llm = LLMClient(llm_base_url, llm_model, llm_api_key)
|
||||
|
||||
# Heavy/escalation model for re-analysis, chat, and manual commands
|
||||
llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", llm_model)
|
||||
self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key)
|
||||
|
||||
# Drama tracker
|
||||
sentiment = config.get("sentiment", {})
|
||||
timeouts = config.get("timeouts", {})
|
||||
@@ -167,6 +171,7 @@ class BCSBot(commands.Bot):
|
||||
async def close(self):
|
||||
await self.db.close()
|
||||
await self.llm.close()
|
||||
await self.llm_heavy.close()
|
||||
await super().close()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user