From 86aacfb84f05b081bf5fa289b147a4941edb20aa Mon Sep 17 00:00:00 2001 From: AJ Isaacs Date: Mon, 23 Feb 2026 10:37:37 -0500 Subject: [PATCH] Add 120s timeout to image analysis streaming The vision model request was hanging indefinitely, freezing the bot. The streaming loop had no timeout so if the model never returned chunks, the bot would wait forever. Now times out after 2 minutes. Co-Authored-By: Claude Opus 4.6 --- utils/llm_client.py | 48 ++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/utils/llm_client.py b/utils/llm_client.py index 7130548..1d1628f 100644 --- a/utils/llm_client.py +++ b/utils/llm_client.py @@ -373,31 +373,39 @@ class LLMClient: async with self._semaphore: try: - stream = await self._client.chat.completions.create( - model=self.model, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_content}, - ], - temperature=0.8, - max_tokens=2048, - stream=True, - ) + async def _stream_image(): + stream = await self._client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_content}, + ], + temperature=0.8, + max_tokens=2048, + stream=True, + ) - chunks: list[str] = [] - notified = False - async for chunk in stream: - delta = chunk.choices[0].delta if chunk.choices else None - if delta and delta.content: - if not notified and on_first_token: - await on_first_token() - notified = True - chunks.append(delta.content) + chunks: list[str] = [] + notified = False + async for chunk in stream: + delta = chunk.choices[0].delta if chunk.choices else None + if delta and delta.content: + if not notified and on_first_token: + await on_first_token() + notified = True + chunks.append(delta.content) - content = "".join(chunks).strip() + return "".join(chunks).strip() + + content = await asyncio.wait_for(_stream_image(), timeout=120) elapsed = int((time.monotonic() - t0) * 1000) self._log_llm("image", elapsed, bool(content), req_json, content or None) return content if content else None + except asyncio.TimeoutError: + elapsed = int((time.monotonic() - t0) * 1000) + logger.error("LLM image analysis timed out after %ds", elapsed // 1000) + self._log_llm("image", elapsed, False, req_json, error="Timeout") + return None except Exception as e: elapsed = int((time.monotonic() - t0) * 1000) logger.error("LLM image analysis error: %s", e)