@@ -47,6 +47,7 @@ async def acreate(
4747        guided_json : Optional [Dict [str , Any ]] =  None ,
4848        guided_regex : Optional [str ] =  None ,
4949        guided_choice : Optional [List [str ]] =  None ,
50+         guided_grammar : Optional [str ] =  None ,
5051        timeout : int  =  COMPLETION_TIMEOUT ,
5152        stream : bool  =  False ,
5253    ) ->  Union [CompletionSyncResponse , AsyncIterable [CompletionStreamResponse ]]:
@@ -118,6 +119,9 @@ async def acreate(
118119            guided_choice (Optional[List[str]]): 
119120                If specified, the output will be exactly one of the choices. 
120121
122+             guided_grammar (Optional[str]): 
123+                 If specified, the output will follow the context-free grammar provided. 
124+ 
121125            timeout (int): 
122126                Timeout in seconds. This is the maximum amount of time you are willing to wait for a response. 
123127
@@ -218,6 +222,7 @@ async def _acreate_stream(
218222                guided_json = guided_json ,
219223                guided_regex = guided_regex ,
220224                guided_choice = guided_choice ,
225+                 guided_grammar = guided_grammar ,
221226                timeout = timeout ,
222227            )
223228
@@ -242,6 +247,11 @@ async def _acreate_sync(**kwargs) -> CompletionSyncResponse:
242247                frequency_penalty = frequency_penalty ,
243248                top_k = top_k ,
244249                top_p = top_p ,
250+                 include_stop_str_in_output = include_stop_str_in_output ,
251+                 guided_json = guided_json ,
252+                 guided_regex = guided_regex ,
253+                 guided_choice = guided_choice ,
254+                 guided_grammar = guided_grammar ,
245255            )
246256
247257    @classmethod  
@@ -261,6 +271,7 @@ def create(
261271        guided_json : Optional [Dict [str , Any ]] =  None ,
262272        guided_regex : Optional [str ] =  None ,
263273        guided_choice : Optional [List [str ]] =  None ,
274+         guided_grammar : Optional [str ] =  None ,
264275        timeout : int  =  COMPLETION_TIMEOUT ,
265276        stream : bool  =  False ,
266277    ) ->  Union [CompletionSyncResponse , Iterator [CompletionStreamResponse ]]:
@@ -333,6 +344,9 @@ def create(
333344            guided_choice (Optional[List[str]]): 
334345                If specified, the output will be exactly one of the choices. 
335346
347+             guided_grammar (Optional[str]): 
348+                 If specified, the output will follow the context-free grammar provided. 
349+ 
336350            timeout (int): 
337351                Timeout in seconds. This is the maximum amount of time you are willing to wait for a response. 
338352
@@ -419,6 +433,11 @@ def _create_stream(**kwargs):
419433                frequency_penalty = frequency_penalty ,
420434                top_k = top_k ,
421435                top_p = top_p ,
436+                 include_stop_str_in_output = include_stop_str_in_output ,
437+                 guided_json = guided_json ,
438+                 guided_regex = guided_regex ,
439+                 guided_choice = guided_choice ,
440+                 guided_grammar = guided_grammar ,
422441            )
423442
424443        else :
@@ -436,6 +455,7 @@ def _create_stream(**kwargs):
436455                guided_json = guided_json ,
437456                guided_regex = guided_regex ,
438457                guided_choice = guided_choice ,
458+                 guided_grammar = guided_grammar ,
439459            ).dict ()
440460            response  =  cls .post_sync (
441461                resource_name = f"v1/llm/completions-sync?model_endpoint_name={ model }  ,
0 commit comments