@@ -290,6 +290,15 @@ class ResponsesRequest(OpenAIBaseModel):
290
290
"default: 0). Any priority other than 0 will raise an error "
291
291
"if the served model does not use priority scheduling." ),
292
292
)
293
+ cache_salt : Optional [str ] = Field (
294
+ default = None ,
295
+ description = (
296
+ "If specified, the prefix cache will be salted with the provided "
297
+ "string to prevent an attacker to guess prompts in multi-user "
298
+ "environments. The salt should be random, protected from "
299
+ "access by 3rd parties, and long enough to be "
300
+ "unpredictable (e.g., 43 characters base64-encoded, corresponding "
301
+ "to 256 bit). Not supported by vLLM engine V0." ))
293
302
# --8<-- [end:responses-extra-params]
294
303
295
304
_DEFAULT_SAMPLING_PARAMS = {
@@ -351,6 +360,19 @@ def validate_prompt(cls, data):
351
360
raise ValueError ("prompt template is not supported" )
352
361
return data
353
362
363
+ @model_validator (mode = "before" )
364
+ def check_cache_salt_support (cls , data ):
365
+ if data .get ("cache_salt" ) is not None :
366
+ if not envs .VLLM_USE_V1 :
367
+ raise ValueError (
368
+ "Parameter 'cache_salt' is not supported with "
369
+ "this instance of vLLM, which uses engine V0." )
370
+ if not isinstance (data ["cache_salt" ],
371
+ str ) or not data ["cache_salt" ]:
372
+ raise ValueError ("Parameter 'cache_salt' must be a "
373
+ "non-empty string if provided." )
374
+ return data
375
+
354
376
355
377
class ChatCompletionRequest (OpenAIBaseModel ):
356
378
# Ordered by official OpenAI API documentation
@@ -1004,6 +1026,16 @@ class CompletionRequest(OpenAIBaseModel):
1004
1026
" as strings of the form 'token_id:{token_id}' so that tokens "
1005
1027
"that are not JSON-encodable can be identified." ))
1006
1028
1029
+ cache_salt : Optional [str ] = Field (
1030
+ default = None ,
1031
+ description = (
1032
+ "If specified, the prefix cache will be salted with the provided "
1033
+ "string to prevent an attacker to guess prompts in multi-user "
1034
+ "environments. The salt should be random, protected from "
1035
+ "access by 3rd parties, and long enough to be "
1036
+ "unpredictable (e.g., 43 characters base64-encoded, corresponding "
1037
+ "to 256 bit). Not supported by vLLM engine V0." ))
1038
+
1007
1039
kv_transfer_params : Optional [dict [str , Any ]] = Field (
1008
1040
default = None ,
1009
1041
description = "KVTransfer parameters used for disaggregated serving." )
@@ -1180,6 +1212,20 @@ def validate_prompt_and_prompt_embeds(cls, data):
1180
1212
"At least one of `prompt` or `prompt_embeds` must be set." )
1181
1213
return data
1182
1214
1215
+ @model_validator (mode = "before" )
1216
+ @classmethod
1217
+ def check_cache_salt_support (cls , data ):
1218
+ if data .get ("cache_salt" ) is not None :
1219
+ if not envs .VLLM_USE_V1 :
1220
+ raise ValueError (
1221
+ "Parameter 'cache_salt' is not supported with "
1222
+ "this instance of vLLM, which uses engine V0." )
1223
+ if not isinstance (data ["cache_salt" ],
1224
+ str ) or not data ["cache_salt" ]:
1225
+ raise ValueError ("Parameter 'cache_salt' must be a "
1226
+ "non-empty string if provided." )
1227
+ return data
1228
+
1183
1229
1184
1230
class EmbeddingCompletionRequest (OpenAIBaseModel ):
1185
1231
# Ordered by official OpenAI API documentation
@@ -1971,7 +2017,7 @@ class TranscriptionRequest(OpenAIBaseModel):
1971
2017
"""
1972
2018
1973
2019
stream : Optional [bool ] = False
1974
- """When set, it will enable output to be streamed in a similar fashion
2020
+ """When set, it will enable output to be streamed in a similar fashion
1975
2021
as the Chat Completion endpoint.
1976
2022
"""
1977
2023
# --8<-- [start:transcription-extra-params]
@@ -2233,9 +2279,9 @@ class TranslationRequest(OpenAIBaseModel):
2233
2279
"""
2234
2280
2235
2281
stream : Optional [bool ] = False
2236
- """Custom field not present in the original OpenAI definition. When set,
2282
+ """Custom field not present in the original OpenAI definition. When set,
2237
2283
it will enable output to be streamed in a similar fashion as the Chat
2238
- Completion endpoint.
2284
+ Completion endpoint.
2239
2285
"""
2240
2286
# Flattened stream option to simplify form data.
2241
2287
stream_include_usage : Optional [bool ] = False
0 commit comments