Add max_tokens=1024 to LLM analysis calls
The analyze_message and raw_analyze methods had no max_tokens limit, causing thinking models (Qwen3-VL-32B-Thinking) to generate unlimited reasoning tokens before responding — taking 5+ minutes per message. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -123,6 +123,7 @@ class LLMClient:
|
|||||||
tools=[ANALYSIS_TOOL],
|
tools=[ANALYSIS_TOOL],
|
||||||
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
|
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
|
max_tokens=1024,
|
||||||
)
|
)
|
||||||
|
|
||||||
choice = response.choices[0]
|
choice = response.choices[0]
|
||||||
@@ -255,6 +256,7 @@ class LLMClient:
|
|||||||
tools=[ANALYSIS_TOOL],
|
tools=[ANALYSIS_TOOL],
|
||||||
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
|
tool_choice={"type": "function", "function": {"name": "report_analysis"}},
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
|
max_tokens=1024,
|
||||||
)
|
)
|
||||||
|
|
||||||
choice = response.choices[0]
|
choice = response.choices[0]
|
||||||
|
|||||||
Reference in New Issue
Block a user