fix: make sure to run for interactive on hello

aarnphm · aarnphm · commit 604736458340 · 2025-04-16T00:40:10.000-04:00
Signed-off-by: Aaron Pham &lt;contact@aarnphm.xyz&gt;
diff --git a/src/openllm/__main__.py b/src/openllm/__main__.py
@@ -7,7 +7,7 @@
 from openllm.accelerator_spec import can_run, get_local_machine_spec
 from openllm.analytic import DO_NOT_TRACK, OpenLLMTyper
 from openllm.clean import app as clean_app
-from openllm.cloud import deploy as cloud_deploy, ensure_cloud_context, get_cloud_machine_spec
+from openllm.cloud import deploy as cloud_deploy, get_cloud_machine_spec
 from openllm.common import CHECKED, INTERACTIVE, VERBOSE_LEVEL, BentoInfo, output
 from openllm.local import run as local_run, serve as local_serve
 from openllm.model import app as model_app, ensure_bento, list_bento
@@ -120,7 +120,14 @@ def _select_target(bento: BentoInfo, targets: list[DeploymentTarget]) -> Deploym
   return selected
 
 
-def _select_action(bento: BentoInfo, score: float, context: typing.Optional[str] = None) -> None:
+def _select_action(
+  bento: BentoInfo,
+  score: float,
+  context: typing.Optional[str] = None,
+  envs: typing.Optional[list[str]] = None,
+  arg: typing.Optional[list[str]] = None,
+  interactive: bool = False,
+) -> None:
   if score > 0:
     options: list[typing.Any] = [
       questionary.Separator('Available actions'),
@@ -168,22 +175,23 @@ def _select_action(bento: BentoInfo, score: float, context: typing.Optional[str]
   if action == 'run':
     try:
       port = random.randint(30000, 40000)
-      local_run(bento, port=port)
+      local_run(bento, port=port, cli_envs=envs, cli_args=arg)
     finally:
       output('\nUse this command to run the action again:', style='green')
       output(f'  $ openllm run {bento}', style='orange')
   elif action == 'serve':
     try:
-      local_serve(bento)
+      local_serve(bento, cli_envs=envs, cli_args=arg)
     finally:
       output('\nUse this command to run the action again:', style='green')
       output(f'  $ openllm serve {bento}', style='orange')
   elif action == 'deploy':
-    ensure_cloud_context()
     targets = get_cloud_machine_spec(context=context)
     target = _select_target(bento, targets)
     try:
-      cloud_deploy(bento, target, context=context)
+      cloud_deploy(
+        bento, target, cli_envs=envs, context=context, cli_args=arg, interactive=interactive
+      )
     finally:
       output('\nUse this command to run the action again:', style='green')
       output(f'  $ openllm deploy {bento} --instance-type {target.name}', style='orange')
@@ -192,11 +200,16 @@ def _select_action(bento: BentoInfo, score: float, context: typing.Optional[str]
 @app.command(help='get started interactively')
 def hello(
   repo: typing.Optional[str] = None,
-  env: typing.Optional[list[str]] = typer.Option(
+  envs: typing.Optional[list[str]] = typer.Option(
     None,
     '--env',
     help='Environment variables to pass to the deployment command. Format: NAME or NAME=value. Can be specified multiple times.',
   ),
+  arg: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--arg',
+    help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
+  ),
   context: typing.Optional[str] = typer.Option(
     None, '--context', help='BentoCloud context name to pass to the deployment command.'
   ),
@@ -221,7 +234,7 @@ def hello(
 
   bento_name, repo = _select_bento_name(models, target)
   bento, score = _select_bento_version(models, target, bento_name, repo)
-  _select_action(bento, score, context=context)
+  _select_action(bento, score, context=context, envs=envs, arg=arg, interactive=INTERACTIVE.get())
 
 
 @app.command(help='start an OpenAI API compatible chat server and chat in browser')
@@ -291,26 +304,43 @@ def deploy(
   context: typing.Optional[str] = typer.Option(
     None, '--context', help='BentoCloud context name to pass to the deployment command.'
   ),
+  arg: typing.Optional[list[str]] = typer.Option(
+    None,
+    '--arg',
+    help='Bento arguments in the form of key=value pairs. Can be specified multiple times.',
+  ),
 ) -> None:
   cmd_update()
   if verbose:
     VERBOSE_LEVEL.set(20)
   bento = ensure_bento(model, repo_name=repo)
   if instance_type is not None:
     return cloud_deploy(
-      bento, DeploymentTarget(accelerators=[], name=instance_type), cli_envs=env, context=context
+      bento,
+      DeploymentTarget(accelerators=[], name=instance_type),
+      cli_envs=env,
+      context=context,
+      cli_args=arg,
+      interactive=INTERACTIVE.get(),
     )
-  targets = sorted(
-    filter(lambda x: can_run(bento, x) > 0, get_cloud_machine_spec(context=context)),
-    key=lambda x: can_run(bento, x),
-    reverse=True,
+  targets = get_cloud_machine_spec(context=context)
+  runnable_targets = sorted(
+    filter(lambda x: can_run(bento, x) > 0, targets), key=lambda x: can_run(bento, x), reverse=True
   )
-  if not targets:
+  if not runnable_targets:
     output('No available instance type, check your bentocloud account', style='red')
     raise typer.Exit(1)
-  target = targets[0]
-  output(f'Recommended instance type: {target.name}', style='green')
-  cloud_deploy(bento, target, cli_envs=env, context=context)
+
+  # Use questionary to select target when in interactive mode and no instance_type is provided
+  if INTERACTIVE.get() and instance_type is None:
+    target = _select_target(bento, targets)
+  else:
+    target = runnable_targets[0]
+    output(f'Recommended instance type: {target.name}', style='green')
+
+  cloud_deploy(
+    bento, target, cli_envs=env, context=context, cli_args=arg, interactive=INTERACTIVE.get()
+  )
 
 
 @app.callback(invoke_without_command=True)
diff --git a/src/openllm/cloud.py b/src/openllm/cloud.py
@@ -5,7 +5,7 @@
 
 from openllm.analytic import OpenLLMTyper
 from openllm.accelerator_spec import ACCELERATOR_SPECS
-from openllm.common import INTERACTIVE, BentoInfo, DeploymentTarget, EnvVars, output, run_command
+from openllm.common import BentoInfo, DeploymentTarget, EnvVars, output, run_command, INTERACTIVE
 
 app = OpenLLMTyper()
 
@@ -22,8 +22,13 @@ def _get_deploy_cmd(
   target: typing.Optional[DeploymentTarget] = None,
   cli_envs: typing.Optional[list[str]] = None,
   context: typing.Optional[str] = None,
+  cli_args: typing.Optional[list[str]] = None,
 ) -> tuple[list[str], EnvVars]:
   cmd = ['bentoml', 'deploy', bento.bentoml_tag]
+  if cli_args:
+    for arg in cli_args:
+      cmd += ['--arg', arg]
+
   env = EnvVars({'BENTOML_HOME': f'{bento.repo.path}/bentoml'})
 
   # Process CLI env vars first to determine overrides
@@ -64,7 +69,7 @@ def _get_deploy_cmd(
 
   for env_info in required_envs:
     name = typing.cast(str, env_info.get('name'))
-    if not name or name in explicit_envs or env_info.get('value', None) is not None:
+    if not name or name in explicit_envs or env_info.get('value', ''):
       continue
 
     if os.environ.get(name):
@@ -210,7 +215,10 @@ def deploy(
   target: DeploymentTarget,
   cli_envs: typing.Optional[list[str]] = None,
   context: typing.Optional[str] = None,
+  cli_args: typing.Optional[list[str]] = None,
+  interactive: bool = False,
 ) -> None:
+  INTERACTIVE.set(interactive)
   ensure_cloud_context()
-  cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs, context=context)
+  cmd, env = _get_deploy_cmd(bento, target, cli_envs=cli_envs, context=context, cli_args=cli_args)
   run_command(cmd, env=env, cwd=None)
diff --git a/uv.lock b/uv.lock