Skip to content

Commit f56ce20

Browse files
dafriztzolov
authored andcommitted
Add support for max_completion_tokens in OpenAI chat options request
An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. Replaces max_tokens field which is now deprecated.
1 parent 110a520 commit f56ce20

File tree

2 files changed

+34
-10
lines changed

2 files changed

+34
-10
lines changed

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ public class OpenAiChatOptions implements FunctionCallingOptions, ChatOptions {
8282
* tokens and generated tokens is limited by the model's context length.
8383
*/
8484
private @JsonProperty("max_tokens") Integer maxTokens;
85+
/**
86+
* An upper bound for the number of tokens that can be generated for a completion,
87+
* including visible output tokens and reasoning tokens.
88+
*/
89+
private @JsonProperty("max_completion_tokens") Integer maxCompletionTokens;
8590
/**
8691
* How many chat completion choices to generate for each input message. Note that you will be charged based
8792
* on the number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
@@ -239,6 +244,11 @@ public Builder withMaxTokens(Integer maxTokens) {
239244
return this;
240245
}
241246

247+
public Builder withMaxCompletionTokens(Integer maxCompletionTokens) {
248+
this.options.maxCompletionTokens = maxCompletionTokens;
249+
return this;
250+
}
251+
242252
public Builder withN(Integer n) {
243253
this.options.n = n;
244254
return this;
@@ -391,6 +401,14 @@ public void setMaxTokens(Integer maxTokens) {
391401
this.maxTokens = maxTokens;
392402
}
393403

404+
public Integer getMaxCompletionTokens() {
405+
return maxCompletionTokens;
406+
}
407+
408+
public void setMaxCompletionTokens(Integer maxCompletionTokens) {
409+
this.maxCompletionTokens = maxCompletionTokens;
410+
}
411+
394412
public Integer getN() {
395413
return this.n;
396414
}
@@ -556,6 +574,7 @@ public static OpenAiChatOptions fromOptions(OpenAiChatOptions fromOptions) {
556574
.withLogprobs(fromOptions.getLogprobs())
557575
.withTopLogprobs(fromOptions.getTopLogprobs())
558576
.withMaxTokens(fromOptions.getMaxTokens())
577+
.withMaxCompletionTokens(fromOptions.getMaxCompletionTokens())
559578
.withN(fromOptions.getN())
560579
.withPresencePenalty(fromOptions.getPresencePenalty())
561580
.withResponseFormat(fromOptions.getResponseFormat())
@@ -578,9 +597,10 @@ public static OpenAiChatOptions fromOptions(OpenAiChatOptions fromOptions) {
578597
@Override
579598
public int hashCode() {
580599
return Objects.hash(this.model, this.frequencyPenalty, this.logitBias, this.logprobs, this.topLogprobs,
581-
this.maxTokens, this.n, this.presencePenalty, this.responseFormat, this.streamOptions, this.seed,
582-
this.stop, this.temperature, this.topP, this.tools, this.toolChoice, this.user, this.parallelToolCalls,
583-
this.functionCallbacks, this.functions, this.httpHeaders, this.proxyToolCalls);
600+
this.maxTokens, this.maxCompletionTokens, this.n, this.presencePenalty, this.responseFormat,
601+
this.streamOptions, this.seed, this.stop, this.temperature, this.topP, this.tools, this.toolChoice,
602+
this.user, this.parallelToolCalls, this.functionCallbacks, this.functions, this.httpHeaders,
603+
this.proxyToolCalls);
584604
}
585605

586606
@Override
@@ -593,8 +613,9 @@ public boolean equals(Object o) {
593613
return Objects.equals(this.model, other.model) && Objects.equals(this.frequencyPenalty, other.frequencyPenalty)
594614
&& Objects.equals(this.logitBias, other.logitBias) && Objects.equals(this.logprobs, other.logprobs)
595615
&& Objects.equals(this.topLogprobs, other.topLogprobs)
596-
&& Objects.equals(this.maxTokens, other.maxTokens) && Objects.equals(this.n, other.n)
597-
&& Objects.equals(this.presencePenalty, other.presencePenalty)
616+
&& Objects.equals(this.maxTokens, other.maxTokens)
617+
&& Objects.equals(this.maxCompletionTokens, other.maxCompletionTokens)
618+
&& Objects.equals(this.n, other.n) && Objects.equals(this.presencePenalty, other.presencePenalty)
598619
&& Objects.equals(this.responseFormat, other.responseFormat)
599620
&& Objects.equals(this.streamOptions, other.streamOptions) && Objects.equals(this.seed, other.seed)
600621
&& Objects.equals(this.stop, other.stop) && Objects.equals(this.temperature, other.temperature)

models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,8 @@ public Function(String description, String name, String jsonSchema) {
390390
* @param maxTokens The maximum number of tokens to generate in the chat completion.
391391
* The total length of input tokens and generated tokens is limited by the model's
392392
* context length.
393+
* @param maxCompletionTokens An upper bound for the number of tokens that can be
394+
* generated for a completion, including visible output tokens and reasoning tokens.
393395
* @param n How many chat completion choices to generate for each input message. Note
394396
* that you will be charged based on the number of generated tokens across all the
395397
* choices. Keep n as 1 to minimize costs.
@@ -442,6 +444,7 @@ public record ChatCompletionRequest(// @formatter:off
442444
@JsonProperty("logprobs") Boolean logprobs,
443445
@JsonProperty("top_logprobs") Integer topLogprobs,
444446
@JsonProperty("max_tokens") Integer maxTokens,
447+
@JsonProperty("max_completion_tokens") Integer maxCompletionTokens,
445448
@JsonProperty("n") Integer n,
446449
@JsonProperty("presence_penalty") Double presencePenalty,
447450
@JsonProperty("response_format") ResponseFormat responseFormat,
@@ -464,7 +467,7 @@ public record ChatCompletionRequest(// @formatter:off
464467
* @param temperature What sampling temperature to use, between 0 and 1.
465468
*/
466469
public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model, Double temperature) {
467-
this(messages, model, null, null, null, null, null, null, null,
470+
this(messages, model, null, null, null, null, null, null, null, null,
468471
null, null, null, false, null, temperature, null,
469472
null, null, null, null);
470473
}
@@ -479,7 +482,7 @@ public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model,
479482
* as they become available, with the stream terminated by a data: [DONE] message.
480483
*/
481484
public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model, Double temperature, boolean stream) {
482-
this(messages, model, null, null, null, null, null, null, null,
485+
this(messages, model, null, null, null, null, null, null, null, null,
483486
null, null, null, stream, null, temperature, null,
484487
null, null, null, null);
485488
}
@@ -495,7 +498,7 @@ public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model,
495498
*/
496499
public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model,
497500
List<FunctionTool> tools, Object toolChoice) {
498-
this(messages, model, null, null, null, null, null, null, null,
501+
this(messages, model, null, null, null, null, null, null, null, null,
499502
null, null, null, false, null, 0.8, null,
500503
tools, toolChoice, null, null);
501504
}
@@ -509,7 +512,7 @@ public ChatCompletionRequest(List<ChatCompletionMessage> messages, String model,
509512
*/
510513
public ChatCompletionRequest(List<ChatCompletionMessage> messages, Boolean stream) {
511514
this(messages, null, null, null, null, null, null, null, null,
512-
null, null, null, stream, null, null, null,
515+
null, null, null, null, stream, null, null, null,
513516
null, null, null, null);
514517
}
515518

@@ -520,7 +523,7 @@ public ChatCompletionRequest(List<ChatCompletionMessage> messages, Boolean strea
520523
* @return A new {@link ChatCompletionRequest} with the specified stream options.
521524
*/
522525
public ChatCompletionRequest withStreamOptions(StreamOptions streamOptions) {
523-
return new ChatCompletionRequest(messages, model, frequencyPenalty, logitBias, logprobs, topLogprobs, maxTokens, n, presencePenalty,
526+
return new ChatCompletionRequest(messages, model, frequencyPenalty, logitBias, logprobs, topLogprobs, maxTokens, maxCompletionTokens, n, presencePenalty,
524527
responseFormat, seed, stop, stream, streamOptions, temperature, topP,
525528
tools, toolChoice, parallelToolCalls, user);
526529
}

0 commit comments

Comments
 (0)