@@ -47,6 +47,7 @@ async def acreate(
47
47
guided_json : Optional [Dict [str , Any ]] = None ,
48
48
guided_regex : Optional [str ] = None ,
49
49
guided_choice : Optional [List [str ]] = None ,
50
+ guided_grammar : Optional [str ] = None ,
50
51
timeout : int = COMPLETION_TIMEOUT ,
51
52
stream : bool = False ,
52
53
) -> Union [CompletionSyncResponse , AsyncIterable [CompletionStreamResponse ]]:
@@ -118,6 +119,9 @@ async def acreate(
118
119
guided_choice (Optional[List[str]]):
119
120
If specified, the output will be exactly one of the choices.
120
121
122
+ guided_grammar (Optional[str]):
123
+ If specified, the output will follow the context-free grammar provided.
124
+
121
125
timeout (int):
122
126
Timeout in seconds. This is the maximum amount of time you are willing to wait for a response.
123
127
@@ -218,6 +222,7 @@ async def _acreate_stream(
218
222
guided_json = guided_json ,
219
223
guided_regex = guided_regex ,
220
224
guided_choice = guided_choice ,
225
+ guided_grammar = guided_grammar ,
221
226
timeout = timeout ,
222
227
)
223
228
@@ -242,6 +247,11 @@ async def _acreate_sync(**kwargs) -> CompletionSyncResponse:
242
247
frequency_penalty = frequency_penalty ,
243
248
top_k = top_k ,
244
249
top_p = top_p ,
250
+ include_stop_str_in_output = include_stop_str_in_output ,
251
+ guided_json = guided_json ,
252
+ guided_regex = guided_regex ,
253
+ guided_choice = guided_choice ,
254
+ guided_grammar = guided_grammar ,
245
255
)
246
256
247
257
@classmethod
@@ -261,6 +271,7 @@ def create(
261
271
guided_json : Optional [Dict [str , Any ]] = None ,
262
272
guided_regex : Optional [str ] = None ,
263
273
guided_choice : Optional [List [str ]] = None ,
274
+ guided_grammar : Optional [str ] = None ,
264
275
timeout : int = COMPLETION_TIMEOUT ,
265
276
stream : bool = False ,
266
277
) -> Union [CompletionSyncResponse , Iterator [CompletionStreamResponse ]]:
@@ -333,6 +344,9 @@ def create(
333
344
guided_choice (Optional[List[str]]):
334
345
If specified, the output will be exactly one of the choices.
335
346
347
+ guided_grammar (Optional[str]):
348
+ If specified, the output will follow the context-free grammar provided.
349
+
336
350
timeout (int):
337
351
Timeout in seconds. This is the maximum amount of time you are willing to wait for a response.
338
352
@@ -419,6 +433,11 @@ def _create_stream(**kwargs):
419
433
frequency_penalty = frequency_penalty ,
420
434
top_k = top_k ,
421
435
top_p = top_p ,
436
+ include_stop_str_in_output = include_stop_str_in_output ,
437
+ guided_json = guided_json ,
438
+ guided_regex = guided_regex ,
439
+ guided_choice = guided_choice ,
440
+ guided_grammar = guided_grammar ,
422
441
)
423
442
424
443
else :
@@ -436,6 +455,7 @@ def _create_stream(**kwargs):
436
455
guided_json = guided_json ,
437
456
guided_regex = guided_regex ,
438
457
guided_choice = guided_choice ,
458
+ guided_grammar = guided_grammar ,
439
459
).dict ()
440
460
response = cls .post_sync (
441
461
resource_name = f"v1/llm/completions-sync?model_endpoint_name={ model } " ,
0 commit comments