@@ -248,6 +248,12 @@ func (r *ChatCompletionResponseFormatJSONSchema) UnmarshalJSON(data []byte) erro
248
248
return nil
249
249
}
250
250
251
+ // NonOpenAIExtensions contains non-standard OpenAI API extensions
252
+ type NonOpenAIExtensions struct {
253
+ // GuidedChoice restricts output to a set of predefined choices.
254
+ GuidedChoice []string `json:"guided_choice,omitempty"`
255
+ }
256
+
251
257
// ChatCompletionRequest represents a request structure for chat completion API.
252
258
type ChatCompletionRequest struct {
253
259
Model string `json:"model"`
@@ -307,10 +313,8 @@ type ChatCompletionRequest struct {
307
313
// Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
308
314
// https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
309
315
ChatTemplateKwargs map [string ]any `json:"chat_template_kwargs,omitempty"`
310
- // Specifies the latency tier to use for processing the request.
311
- ServiceTier ServiceTier `json:"service_tier,omitempty"`
312
- // GuidedChoice restricts output to a set of predefined choices.
313
- GuidedChoice []string `json:"guided_choice,omitempty"`
316
+ // Embedded struct for non-OpenAI extensions
317
+ NonOpenAIExtensions `json:",inline"`
314
318
}
315
319
316
320
type StreamOptions struct {
@@ -394,15 +398,6 @@ const (
394
398
FinishReasonNull FinishReason = "null"
395
399
)
396
400
397
- type ServiceTier string
398
-
399
- const (
400
- ServiceTierAuto ServiceTier = "auto"
401
- ServiceTierDefault ServiceTier = "default"
402
- ServiceTierFlex ServiceTier = "flex"
403
- ServiceTierPriority ServiceTier = "priority"
404
- )
405
-
406
401
func (r FinishReason ) MarshalJSON () ([]byte , error ) {
407
402
if r == FinishReasonNull || r == "" {
408
403
return []byte ("null" ), nil
@@ -435,7 +430,6 @@ type ChatCompletionResponse struct {
435
430
Usage Usage `json:"usage"`
436
431
SystemFingerprint string `json:"system_fingerprint"`
437
432
PromptFilterResults []PromptFilterResult `json:"prompt_filter_results,omitempty"`
438
- ServiceTier ServiceTier `json:"service_tier,omitempty"`
439
433
440
434
httpHeader
441
435
}
0 commit comments