feat: channel-level conversation analysis with compact formatting

Switch from per-user message batching to per-channel conversation
analysis. The LLM now sees the full interleaved conversation with
relative timestamps, reply chains, and consecutive message collapsing
instead of isolated flat text per user.

Key changes:
- Fix gpt-5-nano temperature incompatibility (conditional temp param)
- Add mention-triggered scan: users @mention bot to analyze recent chat
- Refactor debounce buffer from (channel_id, user_id) to channel_id
- Replace per-message analyze_message() with analyze_conversation()
  returning per-user findings from a single LLM call
- Add CONVERSATION_TOOL schema with coherence, topic, and game fields
- Compact message format: relative timestamps, reply arrows (→),
  consecutive same-user message collapsing
- Separate mention scan tasks from debounce tasks
- Remove _store_context/_get_context (conversation block IS the context)
- Escalation timeout config: [30, 60, 120, 240] minutes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-24 23:13:07 -05:00
parent 943c67cc87
commit 90b70cad69
5 changed files with 793 additions and 227 deletions

View File

@@ -3,6 +3,7 @@ import base64
import json
import logging
import time
from datetime import datetime, timedelta, timezone
from pathlib import Path
from openai import AsyncOpenAI
@@ -97,6 +98,114 @@ ANALYSIS_TOOL = {
}
CONVERSATION_TOOL = {
"type": "function",
"function": {
"name": "report_conversation_scan",
"description": "Analyze a conversation block and report findings per user.",
"parameters": {
"type": "object",
"properties": {
"user_findings": {
"type": "array",
"items": {
"type": "object",
"properties": {
"username": {
"type": "string",
"description": "Discord display name of the user.",
},
"toxicity_score": {
"type": "number",
"description": "Weighted toxicity 0.0-1.0 across their messages in this conversation.",
},
"categories": {
"type": "array",
"items": {
"type": "string",
"enum": [
"aggressive",
"passive_aggressive",
"instigating",
"hostile",
"manipulative",
"sexual_vulgar",
"none",
],
},
"description": "Detected toxicity behavior categories.",
},
"reasoning": {
"type": "string",
"description": "Brief explanation of this user's behavior in the conversation.",
},
"worst_message": {
"type": ["string", "null"],
"description": "Most problematic snippet from this user (quoted, max 100 chars), or null if nothing notable.",
},
"off_topic": {
"type": "boolean",
"description": "True if this user's messages were primarily off-topic personal drama.",
},
"topic_category": {
"type": "string",
"enum": [
"gaming",
"personal_drama",
"relationship_issues",
"real_life_venting",
"gossip",
"general_chat",
"meta",
],
"description": "What topic category this user's messages fall into.",
},
"topic_reasoning": {
"type": "string",
"description": "Brief explanation of the topic classification for this user.",
},
"coherence_score": {
"type": "number",
"description": "Coherence rating 0.0-1.0 across this user's messages. Normal texting shortcuts are fine.",
},
"coherence_flag": {
"type": "string",
"enum": [
"normal",
"intoxicated",
"tired",
"angry_typing",
"mobile_keyboard",
"language_barrier",
],
"description": "Best guess at why coherence is low, if applicable.",
},
"note_update": {
"type": ["string", "null"],
"description": "New observation about this user's pattern, or null.",
},
"detected_game": {
"type": ["string", "null"],
"description": "The game channel name this user's messages are about, or null.",
},
},
"required": ["username", "toxicity_score", "categories", "reasoning", "off_topic", "topic_category", "topic_reasoning", "coherence_score", "coherence_flag"],
},
"description": "Findings for each user who participated in the conversation.",
},
"conversation_summary": {
"type": "string",
"description": "One-sentence summary of the overall conversation tone and any escalation patterns.",
},
},
"required": ["user_findings", "conversation_summary"],
},
},
}
_NO_TEMPERATURE_MODELS = {"gpt-5-nano", "o1", "o1-mini", "o1-preview", "o3", "o3-mini", "o4-mini"}
class LLMClient:
def __init__(self, base_url: str, model: str, api_key: str = "not-needed",
db=None, no_think: bool = False, concurrency: int = 4):
@@ -104,6 +213,7 @@ class LLMClient:
self.host = base_url.rstrip("/")
self._db = db
self._no_think = no_think
self._supports_temperature = model not in _NO_TEMPERATURE_MODELS
timeout = 600.0 if self.host else 120.0 # local models need longer for VRAM load
client_kwargs = {"api_key": api_key, "timeout": timeout}
if self.host:
@@ -137,13 +247,15 @@ class LLMClient:
async def analyze_message(
self, message: str, context: str = "", user_notes: str = "",
channel_context: str = "",
channel_context: str = "", mention_context: str = "",
) -> dict | None:
user_content = f"=== RECENT CHANNEL MESSAGES (for background context only) ===\n{context}\n\n"
if user_notes:
user_content += f"=== NOTES ABOUT THIS USER (from prior analysis) ===\n{user_notes}\n\n"
if channel_context:
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
if mention_context:
user_content += f"=== USER REPORT (a user flagged this conversation — focus on this concern) ===\n{mention_context}\n\n"
user_content += f"=== TARGET MESSAGE (analyze THIS message only) ===\n{message}"
user_content = self._append_no_think(user_content)
@@ -155,6 +267,7 @@ class LLMClient:
async with self._semaphore:
try:
temp_kwargs = {"temperature": 0.1} if self._supports_temperature else {}
response = await self._client.chat.completions.create(
model=self.model,
messages=[
@@ -163,7 +276,7 @@ class LLMClient:
],
tools=[ANALYSIS_TOOL],
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
temperature=0.1,
**temp_kwargs,
max_completion_tokens=2048,
)
@@ -253,6 +366,152 @@ class LLMClient:
logger.warning("Could not parse LLM content fallback: %s", text[:200])
return None
# -- Conversation-level analysis (mention scan) --
@staticmethod
def _format_relative_time(delta: timedelta) -> str:
total_seconds = int(delta.total_seconds())
if total_seconds < 60:
return f"~{total_seconds}s ago"
minutes = total_seconds // 60
if minutes < 60:
return f"~{minutes}m ago"
hours = minutes // 60
return f"~{hours}h ago"
@staticmethod
def _format_conversation_block(
messages: list[tuple[str, str, datetime, str | None]],
now: datetime | None = None,
) -> str:
"""Format messages as a compact timestamped chat block.
Each tuple is (username, content, timestamp, reply_to_username).
Consecutive messages from the same user collapse to indented lines.
Replies shown as ``username → replied_to:``.
"""
if now is None:
now = datetime.now(timezone.utc)
lines = [f"[Current time: {now.strftime('%I:%M %p')}]", ""]
last_user = None
for username, content, ts, reply_to in messages:
delta = now - ts.replace(tzinfo=timezone.utc) if ts.tzinfo is None else now - ts
rel = LLMClient._format_relative_time(delta)
if username == last_user:
# Continuation from same user — indent
for line in content.split("\n"):
lines.append(f" {line}")
else:
# New user block
if reply_to:
prefix = f"[{rel}] {username}{reply_to}: "
else:
prefix = f"[{rel}] {username}: "
msg_lines = content.split("\n")
lines.append(prefix + msg_lines[0])
for line in msg_lines[1:]:
lines.append(f" {line}")
last_user = username
return "\n".join(lines)
async def analyze_conversation(
self,
messages: list[tuple[str, str, datetime, str | None]],
mention_context: str = "",
channel_context: str = "",
user_notes_map: dict[str, str] | None = None,
) -> dict | None:
"""Analyze a conversation block in one call, returning per-user findings."""
if not messages:
return None
convo_block = self._format_conversation_block(messages)
user_content = f"=== CONVERSATION BLOCK ===\n{convo_block}\n\n"
if user_notes_map:
notes_lines = [f" {u}: {n}" for u, n in user_notes_map.items() if n]
if notes_lines:
user_content += "=== USER NOTES (from prior analysis) ===\n" + "\n".join(notes_lines) + "\n\n"
if channel_context:
user_content += f"=== CHANNEL INFO ===\n{channel_context}\n\n"
if mention_context:
user_content += f"=== USER REPORT (a user flagged this conversation — focus on this concern) ===\n{mention_context}\n\n"
user_content += "Analyze the conversation block above and report findings for each user."
user_content = self._append_no_think(user_content)
req_json = json.dumps([
{"role": "system", "content": SYSTEM_PROMPT[:500]},
{"role": "user", "content": user_content[:500]},
], default=str)
t0 = time.monotonic()
async with self._semaphore:
try:
temp_kwargs = {"temperature": 0.1} if self._supports_temperature else {}
response = await self._client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": user_content},
],
tools=[CONVERSATION_TOOL],
tool_choice={"type": "function", "function": {"name": "report_conversation_scan"}},
**temp_kwargs,
max_completion_tokens=4096,
)
elapsed = int((time.monotonic() - t0) * 1000)
choice = response.choices[0]
usage = response.usage
if choice.message.tool_calls:
tool_call = choice.message.tool_calls[0]
resp_text = tool_call.function.arguments
args = json.loads(resp_text)
self._log_llm("conversation", elapsed, True, req_json, resp_text,
input_tokens=usage.prompt_tokens if usage else None,
output_tokens=usage.completion_tokens if usage else None)
return self._validate_conversation_result(args)
logger.warning("No tool call in conversation analysis response.")
self._log_llm("conversation", elapsed, False, req_json, error="Empty response")
return None
except Exception as e:
elapsed = int((time.monotonic() - t0) * 1000)
logger.error("LLM conversation analysis error: %s", e)
self._log_llm("conversation", elapsed, False, req_json, error=str(e))
return None
@staticmethod
def _validate_conversation_result(result: dict) -> dict:
"""Validate and normalize conversation analysis result."""
findings = result.get("user_findings", [])
for finding in findings:
finding.setdefault("username", "unknown")
score = float(finding.get("toxicity_score", 0.0))
finding["toxicity_score"] = min(max(score, 0.0), 1.0)
if not isinstance(finding.get("categories"), list):
finding["categories"] = ["none"]
finding.setdefault("reasoning", "")
finding.setdefault("worst_message", None)
finding["off_topic"] = bool(finding.get("off_topic", False))
finding.setdefault("topic_category", "general_chat")
finding.setdefault("topic_reasoning", "")
coherence = float(finding.get("coherence_score", 0.85))
finding["coherence_score"] = min(max(coherence, 0.0), 1.0)
finding.setdefault("coherence_flag", "normal")
finding.setdefault("note_update", None)
finding.setdefault("detected_game", None)
result["user_findings"] = findings
result.setdefault("conversation_summary", "")
return result
async def chat(
self, messages: list[dict[str, str]], system_prompt: str,
on_first_token=None, recent_bot_replies: list[str] | None = None,
@@ -285,16 +544,15 @@ class LLMClient:
async with self._semaphore:
try:
temp_kwargs = {"temperature": 0.9, "frequency_penalty": 0.8, "presence_penalty": 0.6} if self._supports_temperature else {}
stream = await self._client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": effective_prompt},
*patched,
],
temperature=0.9,
**temp_kwargs,
max_completion_tokens=2048,
frequency_penalty=0.8,
presence_penalty=0.6,
stream=True,
)
@@ -350,13 +608,14 @@ class LLMClient:
async with self._semaphore:
try:
async def _stream_image():
temp_kwargs = {"temperature": 0.8} if self._supports_temperature else {}
stream = await self._client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_content},
],
temperature=0.8,
**temp_kwargs,
max_completion_tokens=2048,
stream=True,
)
@@ -409,6 +668,7 @@ class LLMClient:
async with self._semaphore:
try:
temp_kwargs = {"temperature": 0.1} if self._supports_temperature else {}
response = await self._client.chat.completions.create(
model=self.model,
messages=[
@@ -417,7 +677,7 @@ class LLMClient:
],
tools=[ANALYSIS_TOOL],
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
temperature=0.1,
**temp_kwargs,
max_completion_tokens=2048,
)