1
1
from __future__ import annotations
2
2
3
- import asyncio , time , typing
3
+ import asyncio , time , typing , os
4
4
import httpx , openai
5
5
6
6
from openai .types .chat import ChatCompletionAssistantMessageParam , ChatCompletionUserMessageParam
19
19
20
20
21
21
def prep_env_vars (bento : BentoInfo ) -> None :
22
- import os
23
-
24
22
env_vars = bento .envs
25
23
for env_var in env_vars :
26
24
if not env_var .get ('value' ):
@@ -30,23 +28,57 @@ def prep_env_vars(bento: BentoInfo) -> None:
30
28
os .environ [key ] = value
31
29
32
30
33
- def _get_serve_cmd (bento : BentoInfo , port : int = 3000 ) -> tuple [list [str ], EnvVars ]:
31
+ def _get_serve_cmd (
32
+ bento : BentoInfo , port : int = 3000 , cli_args : typing .Optional [list [str ]] = None
33
+ ) -> tuple [list [str ], EnvVars ]:
34
34
cmd = ['bentoml' , 'serve' , bento .bentoml_tag ]
35
35
if port != 3000 :
36
36
cmd += ['--port' , str (port )]
37
+
38
+ # Add CLI arguments if provided
39
+ if cli_args :
40
+ for arg in cli_args :
41
+ cmd += ['--arg' , arg ]
42
+
37
43
return cmd , EnvVars ({'BENTOML_HOME' : f'{ bento .repo .path } /bentoml' })
38
44
39
45
40
- def serve (bento : BentoInfo , port : int = 3000 ) -> None :
46
+ def serve (
47
+ bento : BentoInfo ,
48
+ port : int = 3000 ,
49
+ cli_envs : typing .Optional [list [str ]] = None ,
50
+ cli_args : typing .Optional [list [str ]] = None ,
51
+ ) -> None :
41
52
prep_env_vars (bento )
42
- cmd , env = _get_serve_cmd (bento , port = port )
53
+ cmd , env = _get_serve_cmd (bento , port = port , cli_args = cli_args )
54
+
55
+ # Add CLI environment variables if provided
56
+ if cli_envs :
57
+ for env_var in cli_envs :
58
+ if '=' in env_var :
59
+ key , value = env_var .split ('=' , 1 )
60
+ env [key ] = value
61
+ else :
62
+ env [env_var ] = os .environ .get (env_var , '' )
63
+
43
64
venv = ensure_venv (bento , runtime_envs = env )
44
65
output (f'Access the Chat UI at http://localhost:{ port } /chat (or with you IP)' )
45
66
run_command (cmd , env = env , cwd = None , venv = venv )
46
67
47
68
48
- async def _run_model (bento : BentoInfo , port : int = 3000 , timeout : int = 600 ) -> None :
49
- cmd , env = _get_serve_cmd (bento , port )
69
+ async def _run_model (
70
+ bento : BentoInfo ,
71
+ port : int = 3000 ,
72
+ timeout : int = 600 ,
73
+ cli_env : typing .Optional [dict [str , typing .Any ]] = None ,
74
+ cli_args : typing .Optional [list [str ]] = None ,
75
+ ) -> None :
76
+ cmd , env = _get_serve_cmd (bento , port , cli_args = cli_args )
77
+
78
+ # Merge cli environment variables if provided
79
+ if cli_env :
80
+ env .update (cli_env )
81
+
50
82
venv = ensure_venv (bento , runtime_envs = env )
51
83
async with async_run_command (cmd , env = env , cwd = None , venv = venv , silent = False ) as server_proc :
52
84
output (f'Model server started { server_proc .pid } ' )
@@ -109,9 +141,26 @@ async def _run_model(bento: BentoInfo, port: int = 3000, timeout: int = 600) ->
109
141
except KeyboardInterrupt :
110
142
break
111
143
output ('\n Stopping model server...' , style = 'green' )
112
- output ('Stopped model server' , style = 'green' )
144
+ output ('Stopped model server' , style = 'green' )
113
145
114
146
115
- def run (bento : BentoInfo , port : int = 3000 , timeout : int = 600 ) -> None :
147
+ def run (
148
+ bento : BentoInfo ,
149
+ port : int = 3000 ,
150
+ timeout : int = 600 ,
151
+ cli_envs : typing .Optional [list [str ]] = None ,
152
+ cli_args : typing .Optional [list [str ]] = None ,
153
+ ) -> None :
116
154
prep_env_vars (bento )
117
- asyncio .run (_run_model (bento , port = port , timeout = timeout ))
155
+
156
+ # Add CLI environment variables to the process
157
+ env = {}
158
+ if cli_envs :
159
+ for env_var in cli_envs :
160
+ if '=' in env_var :
161
+ key , value = env_var .split ('=' , 1 )
162
+ env [key ] = value
163
+ else :
164
+ env [env_var ] = os .environ .get (env_var , '' )
165
+
166
+ asyncio .run (_run_model (bento , port = port , timeout = timeout , cli_env = env , cli_args = cli_args ))
0 commit comments