Add LLM request/response logging to database
Log every LLM call (analysis, chat, image, raw_analyze) to a new LlmLog table with request type, model, token counts, duration, success/failure, and truncated request/response payloads. Enables debugging prompt issues and tracking usage. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -65,15 +65,18 @@ class BCSBot(commands.Bot):
|
|||||||
|
|
||||||
self.config = config
|
self.config = config
|
||||||
|
|
||||||
|
# Database (initialized async in setup_hook)
|
||||||
|
self.db = Database()
|
||||||
|
|
||||||
# LLM clients (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
|
# LLM clients (OpenAI-compatible — works with llama.cpp, Ollama, or OpenAI)
|
||||||
llm_base_url = os.getenv("LLM_BASE_URL", "http://athena.lan:11434")
|
llm_base_url = os.getenv("LLM_BASE_URL", "http://athena.lan:11434")
|
||||||
llm_model = os.getenv("LLM_MODEL", "Qwen3-VL-32B-Thinking-Q8_0")
|
llm_model = os.getenv("LLM_MODEL", "Qwen3-VL-32B-Thinking-Q8_0")
|
||||||
llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
|
llm_api_key = os.getenv("LLM_API_KEY", "not-needed")
|
||||||
self.llm = LLMClient(llm_base_url, llm_model, llm_api_key)
|
self.llm = LLMClient(llm_base_url, llm_model, llm_api_key, db=self.db)
|
||||||
|
|
||||||
# Heavy/escalation model for re-analysis, chat, and manual commands
|
# Heavy/escalation model for re-analysis, chat, and manual commands
|
||||||
llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", llm_model)
|
llm_heavy_model = os.getenv("LLM_ESCALATION_MODEL", llm_model)
|
||||||
self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key)
|
self.llm_heavy = LLMClient(llm_base_url, llm_heavy_model, llm_api_key, db=self.db)
|
||||||
|
|
||||||
# Drama tracker
|
# Drama tracker
|
||||||
sentiment = config.get("sentiment", {})
|
sentiment = config.get("sentiment", {})
|
||||||
@@ -84,9 +87,6 @@ class BCSBot(commands.Bot):
|
|||||||
offense_reset_minutes=timeouts.get("offense_reset_minutes", 120),
|
offense_reset_minutes=timeouts.get("offense_reset_minutes", 120),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Database (initialized async in setup_hook)
|
|
||||||
self.db = Database()
|
|
||||||
|
|
||||||
async def setup_hook(self):
|
async def setup_hook(self):
|
||||||
# Initialize database and hydrate DramaTracker
|
# Initialize database and hydrate DramaTracker
|
||||||
db_ok = await self.db.init()
|
db_ok = await self.db.init()
|
||||||
|
|||||||
@@ -126,6 +126,23 @@ class Database:
|
|||||||
ALTER TABLE UserState ADD UserNotes NVARCHAR(MAX) NULL
|
ALTER TABLE UserState ADD UserNotes NVARCHAR(MAX) NULL
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = 'LlmLog')
|
||||||
|
CREATE TABLE LlmLog (
|
||||||
|
Id BIGINT IDENTITY(1,1) PRIMARY KEY,
|
||||||
|
RequestType NVARCHAR(50) NOT NULL,
|
||||||
|
Model NVARCHAR(100) NOT NULL,
|
||||||
|
InputTokens INT NULL,
|
||||||
|
OutputTokens INT NULL,
|
||||||
|
DurationMs INT NOT NULL,
|
||||||
|
Success BIT NOT NULL,
|
||||||
|
Request NVARCHAR(MAX) NOT NULL,
|
||||||
|
Response NVARCHAR(MAX) NULL,
|
||||||
|
Error NVARCHAR(MAX) NULL,
|
||||||
|
CreatedAt DATETIME2 NOT NULL DEFAULT SYSUTCDATETIME()
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
cursor.close()
|
cursor.close()
|
||||||
|
|
||||||
def _parse_database_name(self) -> str:
|
def _parse_database_name(self) -> str:
|
||||||
@@ -348,6 +365,55 @@ class Database:
|
|||||||
finally:
|
finally:
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# LLM Log (fire-and-forget via asyncio.create_task)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
async def save_llm_log(
|
||||||
|
self,
|
||||||
|
request_type: str,
|
||||||
|
model: str,
|
||||||
|
duration_ms: int,
|
||||||
|
success: bool,
|
||||||
|
request: str,
|
||||||
|
response: str | None = None,
|
||||||
|
error: str | None = None,
|
||||||
|
input_tokens: int | None = None,
|
||||||
|
output_tokens: int | None = None,
|
||||||
|
) -> None:
|
||||||
|
"""Save an LLM request/response log entry."""
|
||||||
|
if not self._available:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
await asyncio.to_thread(
|
||||||
|
self._save_llm_log_sync,
|
||||||
|
request_type, model, duration_ms, success, request,
|
||||||
|
response, error, input_tokens, output_tokens,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to save LLM log")
|
||||||
|
|
||||||
|
def _save_llm_log_sync(
|
||||||
|
self, request_type, model, duration_ms, success, request,
|
||||||
|
response, error, input_tokens, output_tokens,
|
||||||
|
):
|
||||||
|
conn = self._connect()
|
||||||
|
try:
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute(
|
||||||
|
"""INSERT INTO LlmLog
|
||||||
|
(RequestType, Model, InputTokens, OutputTokens, DurationMs,
|
||||||
|
Success, Request, Response, Error)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
request_type, model, input_tokens, output_tokens, duration_ms,
|
||||||
|
1 if success else 0,
|
||||||
|
request[:4000] if request else "",
|
||||||
|
response[:4000] if response else None,
|
||||||
|
error[:4000] if error else None,
|
||||||
|
)
|
||||||
|
cursor.close()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
"""No persistent connection to close (connections are per-operation)."""
|
"""No persistent connection to close (connections are per-operation)."""
|
||||||
pass
|
pass
|
||||||
|
|||||||
+88
-11
@@ -2,6 +2,7 @@ import asyncio
|
|||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
@@ -97,9 +98,10 @@ ANALYSIS_TOOL = {
|
|||||||
|
|
||||||
|
|
||||||
class LLMClient:
|
class LLMClient:
|
||||||
def __init__(self, base_url: str, model: str, api_key: str = "not-needed"):
|
def __init__(self, base_url: str, model: str, api_key: str = "not-needed", db=None):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.host = base_url.rstrip("/")
|
self.host = base_url.rstrip("/")
|
||||||
|
self._db = db
|
||||||
self._client = AsyncOpenAI(
|
self._client = AsyncOpenAI(
|
||||||
base_url=f"{self.host}/v1",
|
base_url=f"{self.host}/v1",
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
@@ -107,6 +109,24 @@ class LLMClient:
|
|||||||
)
|
)
|
||||||
self._semaphore = asyncio.Semaphore(1) # serialize requests to avoid overloading
|
self._semaphore = asyncio.Semaphore(1) # serialize requests to avoid overloading
|
||||||
|
|
||||||
|
def _log_llm(self, request_type: str, duration_ms: int, success: bool,
|
||||||
|
request: str, response: str | None = None, error: str | None = None,
|
||||||
|
input_tokens: int | None = None, output_tokens: int | None = None):
|
||||||
|
"""Fire-and-forget LLM log entry to the database."""
|
||||||
|
if not self._db:
|
||||||
|
return
|
||||||
|
asyncio.create_task(self._db.save_llm_log(
|
||||||
|
request_type=request_type,
|
||||||
|
model=self.model,
|
||||||
|
duration_ms=duration_ms,
|
||||||
|
success=success,
|
||||||
|
request=request,
|
||||||
|
response=response,
|
||||||
|
error=error,
|
||||||
|
input_tokens=input_tokens,
|
||||||
|
output_tokens=output_tokens,
|
||||||
|
))
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self._client.close()
|
await self._client.close()
|
||||||
|
|
||||||
@@ -119,7 +139,13 @@ class LLMClient:
|
|||||||
user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
|
user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
|
||||||
if channel_context:
|
if channel_context:
|
||||||
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
|
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
|
||||||
user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}"
|
user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}\n/no_think"
|
||||||
|
|
||||||
|
req_json = json.dumps([
|
||||||
|
{"role": "system", "content": SYSTEM_PROMPT[:500]},
|
||||||
|
{"role": "user", "content": user_content},
|
||||||
|
], default=str)
|
||||||
|
t0 = time.monotonic()
|
||||||
|
|
||||||
async with self._semaphore:
|
async with self._semaphore:
|
||||||
try:
|
try:
|
||||||
@@ -132,26 +158,38 @@ class LLMClient:
|
|||||||
tools=[ANALYSIS_TOOL],
|
tools=[ANALYSIS_TOOL],
|
||||||
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
|
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=1024,
|
max_tokens=2048,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
choice = response.choices[0]
|
choice = response.choices[0]
|
||||||
|
usage = response.usage
|
||||||
|
|
||||||
# Extract tool call arguments
|
# Extract tool call arguments
|
||||||
if choice.message.tool_calls:
|
if choice.message.tool_calls:
|
||||||
tool_call = choice.message.tool_calls[0]
|
tool_call = choice.message.tool_calls[0]
|
||||||
args = json.loads(tool_call.function.arguments)
|
resp_text = tool_call.function.arguments
|
||||||
|
args = json.loads(resp_text)
|
||||||
|
self._log_llm("analysis", elapsed, True, req_json, resp_text,
|
||||||
|
input_tokens=usage.prompt_tokens if usage else None,
|
||||||
|
output_tokens=usage.completion_tokens if usage else None)
|
||||||
return self._validate_result(args)
|
return self._validate_result(args)
|
||||||
|
|
||||||
# Fallback: try parsing the message content as JSON
|
# Fallback: try parsing the message content as JSON
|
||||||
if choice.message.content:
|
if choice.message.content:
|
||||||
|
self._log_llm("analysis", elapsed, True, req_json, choice.message.content,
|
||||||
|
input_tokens=usage.prompt_tokens if usage else None,
|
||||||
|
output_tokens=usage.completion_tokens if usage else None)
|
||||||
return self._parse_content_fallback(choice.message.content)
|
return self._parse_content_fallback(choice.message.content)
|
||||||
|
|
||||||
logger.warning("No tool call or content in LLM response.")
|
logger.warning("No tool call or content in LLM response.")
|
||||||
|
self._log_llm("analysis", elapsed, False, req_json, error="Empty response")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
logger.error("LLM analysis error: %s", e)
|
logger.error("LLM analysis error: %s", e)
|
||||||
|
self._log_llm("analysis", elapsed, False, req_json, error=str(e))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _validate_result(self, result: dict) -> dict:
|
def _validate_result(self, result: dict) -> dict:
|
||||||
@@ -219,16 +257,29 @@ class LLMClient:
|
|||||||
first content token arrives (useful for triggering the typing indicator
|
first content token arrives (useful for triggering the typing indicator
|
||||||
only after the model starts generating).
|
only after the model starts generating).
|
||||||
"""
|
"""
|
||||||
|
# Append /no_think to the last user message to disable thinking on Qwen3
|
||||||
|
patched = []
|
||||||
|
for m in messages:
|
||||||
|
patched.append(m)
|
||||||
|
if patched and patched[-1].get("role") == "user":
|
||||||
|
patched[-1] = {**patched[-1], "content": patched[-1]["content"] + "\n/no_think"}
|
||||||
|
|
||||||
|
req_json = json.dumps([
|
||||||
|
{"role": "system", "content": system_prompt[:500]},
|
||||||
|
*[{"role": m["role"], "content": str(m.get("content", ""))[:200]} for m in patched],
|
||||||
|
], default=str)
|
||||||
|
t0 = time.monotonic()
|
||||||
|
|
||||||
async with self._semaphore:
|
async with self._semaphore:
|
||||||
try:
|
try:
|
||||||
stream = await self._client.chat.completions.create(
|
stream = await self._client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": system_prompt},
|
{"role": "system", "content": system_prompt},
|
||||||
*messages,
|
*patched,
|
||||||
],
|
],
|
||||||
temperature=0.8,
|
temperature=0.8,
|
||||||
max_tokens=300,
|
max_tokens=2048,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -243,9 +294,13 @@ class LLMClient:
|
|||||||
chunks.append(delta.content)
|
chunks.append(delta.content)
|
||||||
|
|
||||||
content = "".join(chunks).strip()
|
content = "".join(chunks).strip()
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
|
self._log_llm("chat", elapsed, bool(content), req_json, content or None)
|
||||||
return content if content else None
|
return content if content else None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
logger.error("LLM chat error: %s", e)
|
logger.error("LLM chat error: %s", e)
|
||||||
|
self._log_llm("chat", elapsed, False, req_json, error=str(e))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def analyze_image(
|
async def analyze_image(
|
||||||
@@ -265,8 +320,13 @@ class LLMClient:
|
|||||||
user_content: list[dict] = [
|
user_content: list[dict] = [
|
||||||
{"type": "image_url", "image_url": {"url": data_url}},
|
{"type": "image_url", "image_url": {"url": data_url}},
|
||||||
]
|
]
|
||||||
if user_text:
|
user_content.append({"type": "text", "text": (user_text or "") + "\n/no_think"})
|
||||||
user_content.append({"type": "text", "text": user_text})
|
|
||||||
|
req_json = json.dumps([
|
||||||
|
{"role": "system", "content": system_prompt[:500]},
|
||||||
|
{"role": "user", "content": f"[image {len(image_bytes)} bytes] {user_text or ''}"},
|
||||||
|
], default=str)
|
||||||
|
t0 = time.monotonic()
|
||||||
|
|
||||||
async with self._semaphore:
|
async with self._semaphore:
|
||||||
try:
|
try:
|
||||||
@@ -277,7 +337,7 @@ class LLMClient:
|
|||||||
{"role": "user", "content": user_content},
|
{"role": "user", "content": user_content},
|
||||||
],
|
],
|
||||||
temperature=0.8,
|
temperature=0.8,
|
||||||
max_tokens=500,
|
max_tokens=2048,
|
||||||
stream=True,
|
stream=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -292,9 +352,13 @@ class LLMClient:
|
|||||||
chunks.append(delta.content)
|
chunks.append(delta.content)
|
||||||
|
|
||||||
content = "".join(chunks).strip()
|
content = "".join(chunks).strip()
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
|
self._log_llm("image", elapsed, bool(content), req_json, content or None)
|
||||||
return content if content else None
|
return content if content else None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
logger.error("LLM image analysis error: %s", e)
|
logger.error("LLM image analysis error: %s", e)
|
||||||
|
self._log_llm("image", elapsed, False, req_json, error=str(e))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def raw_analyze(
|
async def raw_analyze(
|
||||||
@@ -307,7 +371,13 @@ class LLMClient:
|
|||||||
user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
|
user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
|
||||||
if channel_context:
|
if channel_context:
|
||||||
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
|
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
|
||||||
user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}"
|
user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}\n/no_think"
|
||||||
|
|
||||||
|
req_json = json.dumps([
|
||||||
|
{"role": "system", "content": SYSTEM_PROMPT[:500]},
|
||||||
|
{"role": "user", "content": user_content},
|
||||||
|
], default=str)
|
||||||
|
t0 = time.monotonic()
|
||||||
|
|
||||||
async with self._semaphore:
|
async with self._semaphore:
|
||||||
try:
|
try:
|
||||||
@@ -320,10 +390,12 @@ class LLMClient:
|
|||||||
tools=[ANALYSIS_TOOL],
|
tools=[ANALYSIS_TOOL],
|
||||||
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
|
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
max_tokens=1024,
|
max_tokens=2048,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
choice = response.choices[0]
|
choice = response.choices[0]
|
||||||
|
usage = response.usage
|
||||||
parts = []
|
parts = []
|
||||||
parsed = None
|
parsed = None
|
||||||
|
|
||||||
@@ -342,7 +414,12 @@ class LLMClient:
|
|||||||
parsed = self._parse_content_fallback(choice.message.content)
|
parsed = self._parse_content_fallback(choice.message.content)
|
||||||
|
|
||||||
raw = "\n".join(parts) or "(empty response)"
|
raw = "\n".join(parts) or "(empty response)"
|
||||||
|
self._log_llm("raw_analyze", elapsed, parsed is not None, req_json, raw,
|
||||||
|
input_tokens=usage.prompt_tokens if usage else None,
|
||||||
|
output_tokens=usage.completion_tokens if usage else None)
|
||||||
return raw, parsed
|
return raw, parsed
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
elapsed = int((time.monotonic() - t0) * 1000)
|
||||||
|
self._log_llm("raw_analyze", elapsed, False, req_json, error=str(e))
|
||||||
return f"Error: {e}", None
|
return f"Error: {e}", None
|
||||||
|
|||||||
Reference in New Issue
Block a user