diff --git a/utils/llm_client.py b/utils/llm_client.py index 7130548..1d1628f 100644 --- a/utils/llm_client.py +++ b/utils/llm_client.py @@ -373,31 +373,39 @@ class LLMClient: async with self._semaphore: try: - stream = await self._client.chat.completions.create( - model=self.model, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_content}, - ], - temperature=0.8, - max_tokens=2048, - stream=True, - ) + async def _stream_image(): + stream = await self._client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_content}, + ], + temperature=0.8, + max_tokens=2048, + stream=True, + ) - chunks: list[str] = [] - notified = False - async for chunk in stream: - delta = chunk.choices[0].delta if chunk.choices else None - if delta and delta.content: - if not notified and on_first_token: - await on_first_token() - notified = True - chunks.append(delta.content) + chunks: list[str] = [] + notified = False + async for chunk in stream: + delta = chunk.choices[0].delta if chunk.choices else None + if delta and delta.content: + if not notified and on_first_token: + await on_first_token() + notified = True + chunks.append(delta.content) - content = "".join(chunks).strip() + return "".join(chunks).strip() + + content = await asyncio.wait_for(_stream_image(), timeout=120) elapsed = int((time.monotonic() - t0) * 1000) self._log_llm("image", elapsed, bool(content), req_json, content or None) return content if content else None + except asyncio.TimeoutError: + elapsed = int((time.monotonic() - t0) * 1000) + logger.error("LLM image analysis timed out after %ds", elapsed // 1000) + self._log_llm("image", elapsed, False, req_json, error="Timeout") + return None except Exception as e: elapsed = int((time.monotonic() - t0) * 1000) logger.error("LLM image analysis error: %s", e)