Skip to content

Commit bd612ce

Browse files
Add support for Chat Completion Service Tier (#1023)
* Add support for Chat Completion Service Tier * Add priority service tier
1 parent 8e9b2ac commit bd612ce

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

chat.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,8 @@ type ChatCompletionRequest struct {
307307
// Such as think mode for qwen3. "chat_template_kwargs": {"enable_thinking": false}
308308
// https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes
309309
ChatTemplateKwargs map[string]any `json:"chat_template_kwargs,omitempty"`
310+
// Specifies the latency tier to use for processing the request.
311+
ServiceTier ServiceTier `json:"service_tier,omitempty"`
310312
}
311313

312314
type StreamOptions struct {
@@ -390,6 +392,15 @@ const (
390392
FinishReasonNull FinishReason = "null"
391393
)
392394

395+
type ServiceTier string
396+
397+
const (
398+
ServiceTierAuto ServiceTier = "auto"
399+
ServiceTierDefault ServiceTier = "default"
400+
ServiceTierFlex ServiceTier = "flex"
401+
ServiceTierPriority ServiceTier = "priority"
402+
)
403+
393404
func (r FinishReason) MarshalJSON() ([]byte, error) {
394405
if r == FinishReasonNull || r == "" {
395406
return []byte("null"), nil
@@ -422,6 +433,7 @@ type ChatCompletionResponse struct {
422433
Usage Usage `json:"usage"`
423434
SystemFingerprint string `json:"system_fingerprint"`
424435
PromptFilterResults []PromptFilterResult `json:"prompt_filter_results,omitempty"`
436+
ServiceTier ServiceTier `json:"service_tier,omitempty"`
425437

426438
httpHeader
427439
}

0 commit comments

Comments
 (0)