From a0edf90ebd2b46035ad9e271efb389b7aa3a568b Mon Sep 17 00:00:00 2001
From: AJ Isaacs <ajisaacs27@gmail.com>
Date: Tue, 24 Feb 2026 17:07:44 -0500
Subject: [PATCH] Switch to max_completion_tokens for newer OpenAI models

gpt-5-nano and other newer models require max_completion_tokens
instead of max_tokens. The new parameter is backwards compatible
with older models.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 bot.py              | 2 +-
 utils/llm_client.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bot.py b/bot.py
index 0dbeb9d..bcac6cf 100644
--- a/bot.py
+++ b/bot.py
@@ -146,7 +146,7 @@ class BCSBot(commands.Bot):
             await self.llm._client.chat.completions.create(
                 model=self.llm.model,
                 messages=[{"role": "user", "content": "hi"}],
-                max_tokens=1,
+                max_completion_tokens=1,
             )
             logger.info("LLM connectivity check passed.")
         except Exception as e:
diff --git a/utils/llm_client.py b/utils/llm_client.py
index d0e7fd6..e812b3f 100644
--- a/utils/llm_client.py
+++ b/utils/llm_client.py
@@ -164,7 +164,7 @@ class LLMClient:
                     tools=[ANALYSIS_TOOL],
                     tool_choice={"type": "function", "function": {"name": "report_analysis"}},
                     temperature=0.1,
-                    max_tokens=2048,
+                    max_completion_tokens=2048,
                 )
 
                 elapsed = int((time.monotonic() - t0) * 1000)
@@ -292,7 +292,7 @@ class LLMClient:
                         *patched,
                     ],
                     temperature=0.9,
-                    max_tokens=2048,
+                    max_completion_tokens=2048,
                     frequency_penalty=0.8,
                     presence_penalty=0.6,
                     stream=True,
@@ -357,7 +357,7 @@ class LLMClient:
                             {"role": "user", "content": user_content},
                         ],
                         temperature=0.8,
-                        max_tokens=2048,
+                        max_completion_tokens=2048,
                         stream=True,
                     )
 
@@ -418,7 +418,7 @@ class LLMClient:
                     tools=[ANALYSIS_TOOL],
                     tool_choice={"type": "function", "function": {"name": "report_analysis"}},
                     temperature=0.1,
-                    max_tokens=2048,
+                    max_completion_tokens=2048,
                 )
 
                 elapsed = int((time.monotonic() - t0) * 1000)