@@ -326,8 +326,9 @@ class ChatCompletionRequest(OpenAIBaseModel):
326
326
)
327
327
chat_template_kwargs : Optional [dict [str , Any ]] = Field (
328
328
default = None ,
329
- description = ("Additional kwargs to pass to the template renderer. "
330
- "Will be accessible by the chat template." ),
329
+ description = (
330
+ "Additional keyword args to pass to the template renderer. "
331
+ "Will be accessible by the chat template." ),
331
332
)
332
333
mm_processor_kwargs : Optional [dict [str , Any ]] = Field (
333
334
default = None ,
@@ -414,6 +415,12 @@ class ChatCompletionRequest(OpenAIBaseModel):
414
415
default = None ,
415
416
description = "KVTransfer parameters used for disaggregated serving." )
416
417
418
+ vllm_xargs : Optional [dict [str , Union [str , int , float ]]] = Field (
419
+ default = None ,
420
+ description = ("Additional request parameters with string or "
421
+ "numeric values, used by custom extensions." ),
422
+ )
423
+
417
424
# --8<-- [end:chat-completion-extra-params]
418
425
419
426
# Default sampling parameters for chat completion requests
@@ -523,6 +530,10 @@ def to_sampling_params(
523
530
structural_tag = self .structural_tag ,
524
531
)
525
532
533
+ extra_args : dict [str , Any ] = self .vllm_xargs if self .vllm_xargs else {}
534
+ if self .kv_transfer_params :
535
+ # Pass in kv_transfer_params via extra_args
536
+ extra_args ["kv_transfer_params" ] = self .kv_transfer_params
526
537
return SamplingParams .from_optional (
527
538
n = self .n ,
528
539
best_of = self .best_of ,
@@ -553,8 +564,8 @@ def to_sampling_params(
553
564
logit_bias = self .logit_bias ,
554
565
bad_words = self .bad_words ,
555
566
allowed_token_ids = self .allowed_token_ids ,
556
- extra_args = ({ "kv_transfer_params" : self . kv_transfer_params }
557
- if self . kv_transfer_params else None ) )
567
+ extra_args = extra_args or None ,
568
+ )
558
569
559
570
def _get_guided_json_from_tool (
560
571
self ) -> Optional [Union [str , dict , BaseModel ]]:
@@ -871,6 +882,12 @@ class CompletionRequest(OpenAIBaseModel):
871
882
default = None ,
872
883
description = "KVTransfer parameters used for disaggregated serving." )
873
884
885
+ vllm_xargs : Optional [dict [str , Union [str , int , float ]]] = Field (
886
+ default = None ,
887
+ description = ("Additional request parameters with string or "
888
+ "numeric values, used by custom extensions." ),
889
+ )
890
+
874
891
# --8<-- [end:completion-extra-params]
875
892
876
893
# Default sampling parameters for completion requests
@@ -968,6 +985,10 @@ def to_sampling_params(
968
985
whitespace_pattern = self .guided_whitespace_pattern ,
969
986
)
970
987
988
+ extra_args : dict [str , Any ] = self .vllm_xargs if self .vllm_xargs else {}
989
+ if self .kv_transfer_params :
990
+ # Pass in kv_transfer_params via extra_args
991
+ extra_args ["kv_transfer_params" ] = self .kv_transfer_params
971
992
return SamplingParams .from_optional (
972
993
n = self .n ,
973
994
best_of = self .best_of ,
@@ -997,8 +1018,8 @@ def to_sampling_params(
997
1018
guided_decoding = guided_decoding ,
998
1019
logit_bias = self .logit_bias ,
999
1020
allowed_token_ids = self .allowed_token_ids ,
1000
- extra_args = ({ "kv_transfer_params" : self . kv_transfer_params }
1001
- if self . kv_transfer_params else None ) )
1021
+ extra_args = extra_args or None ,
1022
+ )
1002
1023
1003
1024
@model_validator (mode = "before" )
1004
1025
@classmethod
@@ -1117,8 +1138,9 @@ class EmbeddingChatRequest(OpenAIBaseModel):
1117
1138
)
1118
1139
chat_template_kwargs : Optional [dict [str , Any ]] = Field (
1119
1140
default = None ,
1120
- description = ("Additional kwargs to pass to the template renderer. "
1121
- "Will be accessible by the chat template." ),
1141
+ description = (
1142
+ "Additional keyword args to pass to the template renderer. "
1143
+ "Will be accessible by the chat template." ),
1122
1144
)
1123
1145
mm_processor_kwargs : Optional [dict [str , Any ]] = Field (
1124
1146
default = None ,
@@ -1623,8 +1645,9 @@ class TokenizeChatRequest(OpenAIBaseModel):
1623
1645
)
1624
1646
chat_template_kwargs : Optional [dict [str , Any ]] = Field (
1625
1647
default = None ,
1626
- description = ("Additional kwargs to pass to the template renderer. "
1627
- "Will be accessible by the chat template." ),
1648
+ description = (
1649
+ "Additional keyword args to pass to the template renderer. "
1650
+ "Will be accessible by the chat template." ),
1628
1651
)
1629
1652
mm_processor_kwargs : Optional [dict [str , Any ]] = Field (
1630
1653
default = None ,
@@ -1736,6 +1759,12 @@ class TranscriptionRequest(OpenAIBaseModel):
1736
1759
# Flattened stream option to simplify form data.
1737
1760
stream_include_usage : Optional [bool ] = False
1738
1761
stream_continuous_usage_stats : Optional [bool ] = False
1762
+
1763
+ vllm_xargs : Optional [dict [str , Union [str , int , float ]]] = Field (
1764
+ default = None ,
1765
+ description = ("Additional request parameters with string or "
1766
+ "numeric values, used by custom extensions." ),
1767
+ )
1739
1768
# --8<-- [end:transcription-extra-params]
1740
1769
1741
1770
# --8<-- [start:transcription-sampling-params]
@@ -1823,7 +1852,8 @@ def to_sampling_params(
1823
1852
presence_penalty = self .presence_penalty ,
1824
1853
output_kind = RequestOutputKind .DELTA
1825
1854
if self .stream \
1826
- else RequestOutputKind .FINAL_ONLY )
1855
+ else RequestOutputKind .FINAL_ONLY ,
1856
+ extra_args = self .vllm_xargs )
1827
1857
1828
1858
@model_validator (mode = "before" )
1829
1859
@classmethod
0 commit comments