feat: support --env (#1167)

aarnphm · web-flow · commit 98d46e9669f1 · 2025-04-01T14:44:25.000-04:00
Signed-off-by: Aaron Pham &lt;contact@aarnphm.xyz&gt;
diff --git a/README.md b/README.md
@@ -122,7 +122,6 @@ OpenLLM supports a wide range of state-of-the-art open-source LLMs. You can also
   </tr>
 </table>
 
-
 For the full model list, see the [OpenLLM models repository](https://github.com/bentoml/openllm-models).
 
 ## Start an LLM server
@@ -252,7 +251,7 @@ OpenLLM supports LLM cloud deployment via BentoML, the unified model serving fra
 [Sign up for BentoCloud](https://www.bentoml.com/) for free and [log in](https://docs.bentoml.com/en/latest/bentocloud/how-tos/manage-access-token.html). Then, run `openllm deploy` to deploy a model to BentoCloud:
 
 ```bash
-openllm deploy llama3.2:1b
+openllm deploy llama3.2:1b --env HF_TOKEN
 ```
 
 > [!NOTE]
diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py
@@ -240,12 +240,13 @@ def deploy(
     instance_type: typing.Optional[str] = None,
     repo: typing.Optional[str] = None,
     verbose: bool = False,
+    env: typing.Optional[list[str]] = typer.Option(None, "--env", help="Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.")
 ) -> None:
     if verbose:
         VERBOSE_LEVEL.set(20)
     bento = ensure_bento(model, repo_name=repo)
     if instance_type is not None:
-        return cloud_deploy(bento, DeploymentTarget(accelerators=[], name=instance_type))
+        return cloud_deploy(bento, DeploymentTarget(accelerators=[], name=instance_type), cli_envs=env)
     targets = sorted(
         filter(lambda x: can_run(bento, x) > 0, get_cloud_machine_spec()),
         key=lambda x: can_run(bento, x),
@@ -256,7 +257,7 @@ def deploy(
         raise typer.Exit(1)
     target = targets[0]
     output(f'Recommended instance type: {target.name}', style='green')
-    cloud_deploy(bento, target)
+    cloud_deploy(bento, target, cli_envs=env)
 
 
 @app.callback(invoke_without_command=True)
diff --git a/src/openllm/cloud.py b/src/openllm/cloud.py
@@ -17,22 +17,43 @@ def resolve_cloud_config() -> pathlib.Path:
     return pathlib.Path.home() / 'bentoml' / '.yatai.yaml'
 
 
-def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None) -> tuple[list[str], EnvVars]:
+def _get_deploy_cmd(
+    bento: BentoInfo, target: typing.Optional[DeploymentTarget] = None, cli_envs: typing.Optional[list[str]] = None
+) -> tuple[list[str], EnvVars]:
     cmd = ['bentoml', 'deploy', bento.bentoml_tag]
     env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
 
+    # Process CLI env vars first to determine overrides
+    explicit_envs: dict[str, str] = {}
+    if cli_envs:
+        for env_var in cli_envs:
+            if '=' in env_var:
+                name, value = env_var.split('=', 1)
+                explicit_envs[name] = value
+            else:
+                name = env_var
+                value = typing.cast(str, os.environ.get(name))
+                if value is None:
+                    output(f'Environment variable \'{name}\' specified via --env but not found in the current environment.', style='red')
+                    raise typer.Exit(1)
+                explicit_envs[name] = value
+
+    # Process envs defined in bento.yaml, skipping those overridden by CLI
     required_envs = bento.bento_yaml.get('envs', [])
-    required_env_names = [env['name'] for env in required_envs if 'name' in env]
+    required_env_names = [env['name'] for env in required_envs if 'name' in env and env['name'] not in explicit_envs]
     if required_env_names:
         output(
-            f'This model requires the following environment variables to run: {required_env_names!r}', style='yellow'
+            f'This model requires the following environment variables to run (unless overridden via --env): {required_env_names!r}',
+            style='yellow',
         )
 
-    for env_info in bento.bento_yaml.get('envs', []):
-        if 'name' not in env_info:
+    for env_info in required_envs:
+        name = typing.cast(str, env_info.get('name'))
+        if not name or name in explicit_envs:
             continue
-        if os.environ.get(env_info['name']):
-            default = os.environ[env_info['name']]
+
+        if os.environ.get(name):
+            default = os.environ[name]
         elif 'value' in env_info:
             default = env_info['value']
         else:
@@ -41,17 +62,21 @@ def _get_deploy_cmd(bento: BentoInfo, target: typing.Optional[DeploymentTarget]
         if INTERACTIVE.get():
             import questionary
 
-            value = questionary.text(f'{env_info["name"]}:', default=default).ask()
+            value = questionary.text(f'{name}: (from bento.yaml)', default=default).ask()
         else:
             if default == '':
-                output(f'Environment variable {env_info["name"]} is required but not provided', style='red')
+                output(f'Environment variable {name} (from bento.yaml) is required but not provided', style='red')
                 raise typer.Exit(1)
             else:
                 value = default
 
         if value is None:
             raise typer.Exit(1)
-        cmd += ['--env', f'{env_info["name"]}={value}']
+        cmd += ['--env', f'{name}={value}']
+
+    # Add explicitly provided env vars from CLI
+    for name, value in explicit_envs.items():
+        cmd += ['--env', f'{name}={value}']
 
     if target:
         cmd += ['--instance-type', target.name]
@@ -134,7 +159,7 @@ def get_cloud_machine_spec() -> list[DeploymentTarget]:
         return []
 
 
-def deploy(bento: BentoInfo, target: DeploymentTarget) -> None:
+def deploy(bento: BentoInfo, target: DeploymentTarget, cli_envs: typing.Optional[list[str]] = None) -> None:
     ensure_cloud_context()
-    cmd, env = _get_deploy_cmd(bento, target)
+    cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs)
     run_command(cmd, env=env, cwd=None)