[Client] Add guided_grammar and other missing fields (#532)

seanshi-scale · web-flow · commit 9a937f8f419a · 2024-06-04T10:47:56.000-07:00
Add guided_grammar to the client, + add some missing fields to some codepaths
diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.0.0b33"
+__version__ = "0.0.0b34"
 
 import os
 from typing import Sequence
diff --git a/clients/python/llmengine/completion.py b/clients/python/llmengine/completion.py
@@ -47,6 +47,7 @@ async def acreate(
         guided_json: Optional[Dict[str, Any]] = None,
         guided_regex: Optional[str] = None,
         guided_choice: Optional[List[str]] = None,
+        guided_grammar: Optional[str] = None,
         timeout: int = COMPLETION_TIMEOUT,
         stream: bool = False,
     ) -> Union[CompletionSyncResponse, AsyncIterable[CompletionStreamResponse]]:
@@ -118,6 +119,9 @@ async def acreate(
             guided_choice (Optional[List[str]]):
                 If specified, the output will be exactly one of the choices.
 
+            guided_grammar (Optional[str]):
+                If specified, the output will follow the context-free grammar provided.
+
             timeout (int):
                 Timeout in seconds. This is the maximum amount of time you are willing to wait for a response.
 
@@ -218,6 +222,7 @@ async def _acreate_stream(
                 guided_json=guided_json,
                 guided_regex=guided_regex,
                 guided_choice=guided_choice,
+                guided_grammar=guided_grammar,
                 timeout=timeout,
             )
 
@@ -242,6 +247,11 @@ async def _acreate_sync(**kwargs) -> CompletionSyncResponse:
                 frequency_penalty=frequency_penalty,
                 top_k=top_k,
                 top_p=top_p,
+                include_stop_str_in_output=include_stop_str_in_output,
+                guided_json=guided_json,
+                guided_regex=guided_regex,
+                guided_choice=guided_choice,
+                guided_grammar=guided_grammar,
             )
 
     @classmethod
@@ -261,6 +271,7 @@ def create(
         guided_json: Optional[Dict[str, Any]] = None,
         guided_regex: Optional[str] = None,
         guided_choice: Optional[List[str]] = None,
+        guided_grammar: Optional[str] = None,
         timeout: int = COMPLETION_TIMEOUT,
         stream: bool = False,
     ) -> Union[CompletionSyncResponse, Iterator[CompletionStreamResponse]]:
@@ -333,6 +344,9 @@ def create(
             guided_choice (Optional[List[str]]):
                 If specified, the output will be exactly one of the choices.
 
+            guided_grammar (Optional[str]):
+                If specified, the output will follow the context-free grammar provided.
+
             timeout (int):
                 Timeout in seconds. This is the maximum amount of time you are willing to wait for a response.
 
@@ -419,6 +433,11 @@ def _create_stream(**kwargs):
                 frequency_penalty=frequency_penalty,
                 top_k=top_k,
                 top_p=top_p,
+                include_stop_str_in_output=include_stop_str_in_output,
+                guided_json=guided_json,
+                guided_regex=guided_regex,
+                guided_choice=guided_choice,
+                guided_grammar=guided_grammar,
             )
 
         else:
@@ -436,6 +455,7 @@ def _create_stream(**kwargs):
                 guided_json=guided_json,
                 guided_regex=guided_regex,
                 guided_choice=guided_choice,
+                guided_grammar=guided_grammar,
             ).dict()
             response = cls.post_sync(
                 resource_name=f"v1/llm/completions-sync?model_endpoint_name={model}",
diff --git a/clients/python/llmengine/data_types.py b/clients/python/llmengine/data_types.py
@@ -331,6 +331,7 @@ class CompletionSyncV1Request(BaseModel):
     guided_json: Optional[Dict[str, Any]] = Field(default=None)
     guided_regex: Optional[str] = Field(default=None)
     guided_choice: Optional[List[str]] = Field(default=None)
+    guided_grammar: Optional[str] = Field(default=None)
 
 
 class TokenOutput(BaseModel):
@@ -405,6 +406,7 @@ class CompletionStreamV1Request(BaseModel):
     guided_json: Optional[Dict[str, Any]] = Field(default=None)
     guided_regex: Optional[str] = Field(default=None)
     guided_choice: Optional[List[str]] = Field(default=None)
+    guided_grammar: Optional[str] = Field(default=None)
 
 
 class CompletionStreamOutput(BaseModel):
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scale-llm-engine"
-version = "0.0.0.beta33"
+version = "0.0.0.beta34"
 description = "Scale LLM Engine Python client"
 license = "Apache-2.0"
 authors = ["Phil Chen <phil.chen@scale.com>"]
diff --git a/clients/python/setup.py b/clients/python/setup.py
@@ -3,7 +3,7 @@
 setup(
     name="scale-llm-engine",
     python_requires=">=3.7",
-    version="0.0.0.beta33",
+    version="0.0.0.beta34",
     packages=find_packages(),
     package_data={"llmengine": ["py.typed"]},
 )

Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,7 @@`
`3`	`3`	`setup(`
`4`	`4`	`name="scale-llm-engine",`
`5`	`5`	`python_requires=">=3.7",`
`6`		`- version="0.0.0.beta33",`
	`6`	`+ version="0.0.0.beta34",`
`7`	`7`	`packages=find_packages(),`
`8`	`8`	`package_data={"llmengine": ["py.typed"]},`
`9`	`9`	`)`