Add 120s timeout to image analysis streaming

The vision model request was hanging indefinitely, freezing the bot.
The streaming loop had no timeout so if the model never returned
chunks, the bot would wait forever. Now times out after 2 minutes.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-23 10:37:37 -05:00
parent e1dea84d08
commit 86aacfb84f

View File

@@ -373,31 +373,39 @@ class LLMClient:
async with self._semaphore: async with self._semaphore:
try: try:
stream = await self._client.chat.completions.create( async def _stream_image():
model=self.model, stream = await self._client.chat.completions.create(
messages=[ model=self.model,
{"role": "system", "content": system_prompt}, messages=[
{"role": "user", "content": user_content}, {"role": "system", "content": system_prompt},
], {"role": "user", "content": user_content},
temperature=0.8, ],
max_tokens=2048, temperature=0.8,
stream=True, max_tokens=2048,
) stream=True,
)
chunks: list[str] = [] chunks: list[str] = []
notified = False notified = False
async for chunk in stream: async for chunk in stream:
delta = chunk.choices[0].delta if chunk.choices else None delta = chunk.choices[0].delta if chunk.choices else None
if delta and delta.content: if delta and delta.content:
if not notified and on_first_token: if not notified and on_first_token:
await on_first_token() await on_first_token()
notified = True notified = True
chunks.append(delta.content) chunks.append(delta.content)
content = "".join(chunks).strip() return "".join(chunks).strip()
content = await asyncio.wait_for(_stream_image(), timeout=120)
elapsed = int((time.monotonic() - t0) * 1000) elapsed = int((time.monotonic() - t0) * 1000)
self._log_llm("image", elapsed, bool(content), req_json, content or None) self._log_llm("image", elapsed, bool(content), req_json, content or None)
return content if content else None return content if content else None
except asyncio.TimeoutError:
elapsed = int((time.monotonic() - t0) * 1000)
logger.error("LLM image analysis timed out after %ds", elapsed // 1000)
self._log_llm("image", elapsed, False, req_json, error="Timeout")
return None
except Exception as e: except Exception as e:
elapsed = int((time.monotonic() - t0) * 1000) elapsed = int((time.monotonic() - t0) * 1000)
logger.error("LLM image analysis error: %s", e) logger.error("LLM image analysis error: %s", e)