Switch to max_completion_tokens for newer OpenAI models

gpt-5-nano and other newer models require max_completion_tokens instead of max_tokens. The new parameter is backwards compatible with older models. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-24 17:07:44 -05:00
parent dd0d18b0f5
commit a0edf90ebd
2 changed files with 5 additions and 5 deletions
@@ -146,7 +146,7 @@ class BCSBot(commands.Bot):
            await self.llm._client.chat.completions.create(
                model=self.llm.model,
                messages=[{"role": "user", "content": "hi"}],
-                max_tokens=1,
+                max_completion_tokens=1,
            )
            logger.info("LLM connectivity check passed.")
        except Exception as e:
@@ -164,7 +164,7 @@ class LLMClient:
                    tools=[ANALYSIS_TOOL],
                    tool_choice={"type": "function", "function": {"name": "report_analysis"}},
                    temperature=0.1,
-                    max_tokens=2048,
+                    max_completion_tokens=2048,
                )

                elapsed = int((time.monotonic() - t0) * 1000)
@@ -292,7 +292,7 @@ class LLMClient:
                        *patched,
                    ],
                    temperature=0.9,
-                    max_tokens=2048,
+                    max_completion_tokens=2048,
                    frequency_penalty=0.8,
                    presence_penalty=0.6,
                    stream=True,
@@ -357,7 +357,7 @@ class LLMClient:
                            {"role": "user", "content": user_content},
                        ],
                        temperature=0.8,
-                        max_tokens=2048,
+                        max_completion_tokens=2048,
                        stream=True,
                    )

@@ -418,7 +418,7 @@ class LLMClient:
                    tools=[ANALYSIS_TOOL],
                    tool_choice={"type": "function", "function": {"name": "report_analysis"}},
                    temperature=0.1,
-                    max_tokens=2048,
+                    max_completion_tokens=2048,
                )

                elapsed = int((time.monotonic() - t0) * 1000)