Skip to content

Commit 3e63a0d

Browse files
authored
Support space separated args, fixed config (#78)
Signed-off-by: Ira <IRAR@il.ibm.com>
1 parent a2ce44d commit 3e63a0d

File tree

5 files changed

+143
-67
lines changed

5 files changed

+143
-67
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,8 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
8888
- `config`: the path to a yaml configuration file
8989
- `port`: the port the simulator listents on, default is 8000
9090
- `model`: the currently 'loaded' model, mandatory
91-
- `served-model-name`: model names exposed by the API (comma-separated)
92-
- `lora-modules`: LoRA module configurations in JSON format: [{"name": "name", "path": "lora_path", "base_model_name": "id"}], optional, empty by default
91+
- `served-model-name`: model names exposed by the API (a list of space-separated strings)
92+
- `lora-modules`: a list of LoRA adapters (a list of space-separated JSON strings): '{"name": "name", "path": "lora_path", "base_model_name": "id"}', optional, empty by default
9393
- `max-loras`: maximum number of LoRAs in a single batch, optional, default is one
9494
- `max-cpu-loras`: maximum number of LoRAs to store in CPU memory, optional, must be >= than max-loras, default is max-loras
9595
- `max-num-seqs`: maximum number of sequences per iteration (maximum number of inference requests that could be processed at the same time), default is 5
@@ -118,7 +118,7 @@ In addition, as we are using klog, the following parameters are available:
118118

119119
## Migrating from releases prior to v0.2.0
120120
- `max-running-requests` was replaced by `max-num-seqs`
121-
- `lora` was replaced by `lora-modules`, which is now an array in JSON format, e.g, [{"name": "name", "path": "lora_path", "base_model_name": "id"}]
121+
- `lora` was replaced by `lora-modules`, which is now a list of JSON strings, e.g, '{"name": "name", "path": "lora_path", "base_model_name": "id"}'
122122

123123
## Working with docker image
124124

manifests/config.yaml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
port: 8001
22
model: "Qwen/Qwen2-0.5B"
3-
served-model-name: ["model1", "model2"]
3+
served-model-name:
4+
- "model1"
5+
- "model2"
46
max-loras: 2
57
max-cpu-loras: 5
68
max-num-seqs: 5
7-
lora-modules: [{"name":"lora1","path":"/path/to/lora1"},{"name":"lora2","path":"/path/to/lora2"}]
8-
9+
lora-modules:
10+
- '{"name":"lora1","path":"/path/to/lora1"}'
11+
- '{"name":"lora2","path":"/path/to/lora2"}'
912
mode: "random"
1013
time-to-first-token: 2
1114
inter-token-latency: 1

pkg/llm-d-inference-sim/config.go

Lines changed: 30 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"errors"
2222
"fmt"
2323
"os"
24+
"strings"
2425

2526
"gopkg.in/yaml.v3"
2627
)
@@ -39,8 +40,10 @@ type configuration struct {
3940
// MaxNumSeqs is maximum number of sequences per iteration (the maximum
4041
// number of inference requests that could be processed at the same time)
4142
MaxNumSeqs int `yaml:"max-num-seqs"`
43+
// LoraModulesString is a list of LoRA adapters as strings
44+
LoraModulesString []string `yaml:"lora-modules"`
4245
// LoraModules is a list of LoRA adapters
43-
LoraModules loraModulesValue `yaml:"lora-modules"`
46+
LoraModules []loraModule
4447

4548
// TimeToFirstToken time before the first token will be returned, in milliseconds
4649
TimeToFirstToken int `yaml:"time-to-first-token"`
@@ -52,42 +55,41 @@ type configuration struct {
5255

5356
type loraModule struct {
5457
// Name is the LoRA's name
55-
Name string `yaml:"name"`
58+
Name string `json:"name"`
5659
// Path is the LoRA's path
57-
Path string `yaml:"path"`
60+
Path string `json:"path"`
5861
// BaseModelName is the LoRA's base model
59-
BaseModelName string `yaml:"base_model_name"`
62+
BaseModelName string `json:"base_model_name"`
6063
}
6164

62-
type loraModulesValue []loraModule
65+
// Needed to parse values that contain multiple strings
66+
type multiString struct {
67+
values []string
68+
}
6369

64-
func (l *loraModulesValue) String() string {
65-
b, _ := json.Marshal(l)
66-
return string(b)
70+
func (l *multiString) String() string {
71+
return strings.Join(l.values, " ")
6772
}
6873

69-
func (l *loraModulesValue) Set(val string) error {
70-
return json.Unmarshal([]byte(val), l)
74+
func (l *multiString) Set(val string) error {
75+
l.values = append(l.values, val)
76+
return nil
7177
}
7278

73-
func (l *loraModulesValue) Type() string {
74-
return "loras"
79+
func (l *multiString) Type() string {
80+
return "strings"
7581
}
7682

77-
// Implement custom YAML unmarshaling for just this type
78-
func (l *loraModulesValue) UnmarshalYAML(unmarshal func(interface{}) error) error {
79-
// Try parsing as an array of loraModule
80-
var arr []loraModule
81-
if err := unmarshal(&arr); err == nil {
82-
*l = arr
83-
return nil
84-
}
85-
// Try parsing as a JSON string
86-
var str string
87-
if err := unmarshal(&str); err == nil {
88-
return json.Unmarshal([]byte(str), l)
83+
func (c *configuration) unmarshalLoras() error {
84+
c.LoraModules = make([]loraModule, 0)
85+
for _, jsonStr := range c.LoraModulesString {
86+
var lora loraModule
87+
if err := json.Unmarshal([]byte(jsonStr), &lora); err != nil {
88+
return err
89+
}
90+
c.LoraModules = append(c.LoraModules, lora)
8991
}
90-
return errors.New("lora-modules: invalid format")
92+
return nil
9193
}
9294

9395
func newConfig() *configuration {
@@ -108,7 +110,8 @@ func (c *configuration) load(configFile string) error {
108110
if err := yaml.Unmarshal(configBytes, &c); err != nil {
109111
return fmt.Errorf("failed to unmarshal configuration: %s", err)
110112
}
111-
return nil
113+
114+
return c.unmarshalLoras()
112115
}
113116

114117
func (c *configuration) validate() error {
@@ -118,7 +121,7 @@ func (c *configuration) validate() error {
118121
// Upstream vLLM behaviour: when --served-model-name is not provided,
119122
// it falls back to using the value of --model as the single public name
120123
// returned by the API and exposed in Prometheus metrics.
121-
if len(c.ServedModelNames) == 0 {
124+
if len(c.ServedModelNames) == 0 || c.ServedModelNames[0] == "" {
122125
c.ServedModelNames = []string{c.Model}
123126
}
124127

pkg/llm-d-inference-sim/config_test.go

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ var _ = Describe("Simulator configuration", func() {
7878
args: []string{"cmd", "--config", "../../manifests/config.yaml"},
7979
expectedConfig: c,
8080
}
81+
c.LoraModulesString = []string{
82+
"{\"name\":\"lora1\",\"path\":\"/path/to/lora1\"}",
83+
"{\"name\":\"lora2\",\"path\":\"/path/to/lora2\"}",
84+
}
8185
tests = append(tests, test)
8286

8387
// Config from config.yaml file plus command line args
@@ -90,11 +94,65 @@ var _ = Describe("Simulator configuration", func() {
9094
c.MaxNumSeqs = 5
9195
c.TimeToFirstToken = 2
9296
c.InterTokenLatency = 1
93-
c.LoraModules = []loraModule{{Name: "lora1", Path: "/path/to/lora1"}, {Name: "lora2", Path: "/path/to/lora2"}}
97+
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}, {Name: "lora4", Path: "/path/to/lora4"}}
98+
c.LoraModulesString = []string{
99+
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
100+
"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}",
101+
}
102+
test = testCase{
103+
name: "config file with command line args",
104+
args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
105+
"--served-model-name", "alias1", "alias2",
106+
"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}", "{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}",
107+
},
108+
expectedConfig: c,
109+
}
110+
tests = append(tests, test)
111+
112+
// Config from config.yaml file plus command line args with different format
113+
c = newConfig()
114+
c.Port = 8002
115+
c.Model = model
116+
c.ServedModelNames = []string{c.Model}
117+
c.MaxLoras = 2
118+
c.MaxCPULoras = 5
119+
c.MaxNumSeqs = 5
120+
c.TimeToFirstToken = 2
121+
c.InterTokenLatency = 1
122+
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
123+
c.LoraModulesString = []string{
124+
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
125+
}
126+
test = testCase{
127+
name: "config file with command line args",
128+
args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
129+
"--served-model-name",
130+
"--lora-modules={\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
131+
},
132+
expectedConfig: c,
133+
}
134+
tests = append(tests, test)
135+
136+
// Config from config.yaml file plus command line args with empty string
137+
c = newConfig()
138+
c.Port = 8002
139+
c.Model = model
140+
c.ServedModelNames = []string{c.Model}
141+
c.MaxLoras = 2
142+
c.MaxCPULoras = 5
143+
c.MaxNumSeqs = 5
144+
c.TimeToFirstToken = 2
145+
c.InterTokenLatency = 1
146+
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
147+
c.LoraModulesString = []string{
148+
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
149+
}
94150
test = testCase{
95151
name: "config file with command line args",
96152
args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
97-
"--served-model-name", "alias1,alias2"},
153+
"--served-model-name", "",
154+
"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
155+
},
98156
expectedConfig: c,
99157
}
100158
tests = append(tests, test)
@@ -140,17 +198,19 @@ var _ = Describe("Simulator configuration", func() {
140198
Entry(tests[0].name, tests[0].args, tests[0].expectedConfig),
141199
Entry(tests[1].name, tests[1].args, tests[1].expectedConfig),
142200
Entry(tests[2].name, tests[2].args, tests[2].expectedConfig),
201+
Entry(tests[3].name, tests[3].args, tests[3].expectedConfig),
202+
Entry(tests[4].name, tests[4].args, tests[4].expectedConfig),
143203
)
144204

145205
DescribeTable("invalid configurations",
146206
func(args []string) {
147207
_, err := createSimConfig(args)
148208
Expect(err).To(HaveOccurred())
149209
},
150-
Entry(tests[3].name, tests[3].args),
151-
Entry(tests[4].name, tests[4].args),
152210
Entry(tests[5].name, tests[5].args),
153211
Entry(tests[6].name, tests[6].args),
154212
Entry(tests[7].name, tests[7].args),
213+
Entry(tests[8].name, tests[8].args),
214+
Entry(tests[9].name, tests[9].args),
155215
)
156216
})

pkg/llm-d-inference-sim/simulator.go

Lines changed: 40 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -132,33 +132,36 @@ func (s *VllmSimulator) Start(ctx context.Context) error {
132132
// parseCommandParamsAndLoadConfig parses and validates command line parameters
133133
func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
134134
config := newConfig()
135-
configFile := getConfigPathFromArgs()
136-
if configFile != "" {
137-
if err := config.load(configFile); err != nil {
135+
136+
configFileValues := getParamValueFromArgs("config")
137+
if len(configFileValues) == 1 {
138+
if err := config.load(configFileValues[0]); err != nil {
138139
return err
139140
}
140141
}
141142

143+
servedModelNames := getParamValueFromArgs("served-model-name")
144+
loraModuleNames := getParamValueFromArgs("lora-modules")
145+
142146
f := pflag.NewFlagSet("llm-d-inference-sim flags", pflag.ExitOnError)
143147

144148
f.IntVar(&config.Port, "port", config.Port, "Port")
145149
f.StringVar(&config.Model, "model", config.Model, "Currently 'loaded' model")
146-
147-
var servedModelName []string
148-
f.StringSliceVar(&servedModelName, "served-model-name", nil, "Model names exposed by the API (comma-separated)")
149150
f.IntVar(&config.MaxNumSeqs, "max-num-seqs", config.MaxNumSeqs, "Maximum number of inference requests that could be processed at the same time (parameter to simulate requests waiting queue)")
151+
f.IntVar(&config.MaxLoras, "max-loras", config.MaxLoras, "Maximum number of LoRAs in a single batch")
152+
f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
150153

151154
f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode, echo - returns the same text that was sent in the request, for chat completion returns the last message, random - returns random sentence from a bank of pre-defined sentences")
152155
f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
153156
f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
154157

155-
var loras loraModulesValue
156-
f.Var(&loras, "lora-modules", "List of LoRA adapters (an array in JSON format)")
157-
158-
f.IntVar(&config.MaxLoras, "max-loras", config.MaxLoras, "Maximum number of LoRAs in a single batch")
159-
f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
160-
158+
// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
159+
var servedModelNameStrings multiString
160+
f.Var(&servedModelNameStrings, "served-model-name", "Model names exposed by the API (a list of space-separated strings)")
161+
var configFile string
161162
f.StringVar(&configFile, "config", "", "The configuration file")
163+
var loras multiString
164+
f.Var(&loras, "lora-modules", "List of LoRA adapters (a list of space-separated JSON strings)")
162165

163166
flagSet := flag.NewFlagSet("simFlagSet", flag.ExitOnError)
164167
klog.InitFlags(flagSet)
@@ -169,11 +172,14 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
169172
}
170173

171174
// Need to read in a variable to avoid merging the values with the config file ones
172-
if loras != nil {
173-
config.LoraModules = loras
175+
if loraModuleNames != nil {
176+
config.LoraModulesString = loraModuleNames
177+
if err := config.unmarshalLoras(); err != nil {
178+
return err
179+
}
174180
}
175-
if servedModelName != nil {
176-
config.ServedModelNames = servedModelName
181+
if servedModelNames != nil {
182+
config.ServedModelNames = servedModelNames
177183
}
178184

179185
if err := config.validate(); err != nil {
@@ -191,23 +197,27 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
191197
return nil
192198
}
193199

194-
func getConfigPathFromArgs() string {
195-
for i, arg := range os.Args[1:] {
196-
if arg == "--config" || arg == "-config" {
197-
// Next argument should be the path
198-
if i+2 <= len(os.Args)-1 {
199-
return os.Args[i+2]
200+
func getParamValueFromArgs(param string) []string {
201+
var values []string
202+
var readValues bool
203+
for _, arg := range os.Args[1:] {
204+
if readValues {
205+
if strings.HasPrefix(arg, "--") {
206+
break
207+
}
208+
values = append(values, arg)
209+
} else {
210+
if arg == "--"+param {
211+
readValues = true
212+
values = make([]string, 0)
213+
} else if strings.HasPrefix(arg, "--"+param+"=") {
214+
// Handle --param=value
215+
values = append(values, strings.TrimPrefix(arg, "--"+param+"="))
216+
break
200217
}
201-
}
202-
// Handle --config=path or -config=path
203-
if strings.HasPrefix(arg, "--config=") {
204-
return strings.TrimPrefix(arg, "--config=")
205-
}
206-
if strings.HasPrefix(arg, "-config=") {
207-
return strings.TrimPrefix(arg, "-config=")
208218
}
209219
}
210-
return ""
220+
return values
211221
}
212222

213223
func (s *VllmSimulator) newListener() (net.Listener, error) {

0 commit comments

Comments
 (0)