Support space separated args, fixed config (#78)

irar2 · web-flow · commit 3e63a0d869a8 · 2025-07-06T12:52:09.000+03:00
Signed-off-by: Ira &lt;IRAR@il.ibm.com&gt;
diff --git a/README.md b/README.md
@@ -88,8 +88,8 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
 - `config`: the path to a yaml configuration file 
 - `port`: the port the simulator listents on, default is 8000
 - `model`: the currently 'loaded' model, mandatory
-- `served-model-name`: model names exposed by the API (comma-separated)
-- `lora-modules`: LoRA module configurations in JSON format: [{"name": "name", "path": "lora_path", "base_model_name": "id"}], optional, empty by default
+- `served-model-name`: model names exposed by the API (a list of space-separated strings)
+- `lora-modules`: a list of LoRA adapters (a list of space-separated JSON strings): '{"name": "name", "path": "lora_path", "base_model_name": "id"}', optional, empty by default
 - `max-loras`: maximum number of LoRAs in a single batch, optional, default is one
 - `max-cpu-loras`: maximum number of LoRAs to store in CPU memory, optional, must be >= than max-loras, default is max-loras
 - `max-num-seqs`: maximum number of sequences per iteration (maximum number of inference requests that could be processed at the same time), default is 5
@@ -118,7 +118,7 @@ In addition, as we are using klog, the following parameters are available:
 
 ## Migrating from releases prior to v0.2.0
 - `max-running-requests` was replaced by `max-num-seqs`
-- `lora` was replaced by `lora-modules`, which is now an array in JSON format, e.g, [{"name": "name", "path": "lora_path", "base_model_name": "id"}]
+- `lora` was replaced by `lora-modules`, which is now a list of JSON strings, e.g, '{"name": "name", "path": "lora_path", "base_model_name": "id"}'
 
 ## Working with docker image
 
diff --git a/manifests/config.yaml b/manifests/config.yaml
@@ -1,11 +1,14 @@
 port: 8001
 model: "Qwen/Qwen2-0.5B"
-served-model-name: ["model1", "model2"]
+served-model-name: 
+- "model1"
+- "model2"
 max-loras: 2
 max-cpu-loras: 5
 max-num-seqs: 5
-lora-modules: [{"name":"lora1","path":"/path/to/lora1"},{"name":"lora2","path":"/path/to/lora2"}]
-
+lora-modules:
+- '{"name":"lora1","path":"/path/to/lora1"}'
+- '{"name":"lora2","path":"/path/to/lora2"}'
 mode: "random"
 time-to-first-token: 2
 inter-token-latency: 1
diff --git a/pkg/llm-d-inference-sim/config.go b/pkg/llm-d-inference-sim/config.go
@@ -21,6 +21,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"strings"
 
 	"gopkg.in/yaml.v3"
 )
@@ -39,8 +40,10 @@ type configuration struct {
 	// MaxNumSeqs is maximum number of sequences per iteration (the maximum
 	// number of inference requests that could be processed at the same time)
 	MaxNumSeqs int `yaml:"max-num-seqs"`
+	// LoraModulesString is a list of LoRA adapters as strings
+	LoraModulesString []string `yaml:"lora-modules"`
 	// LoraModules is a list of LoRA adapters
-	LoraModules loraModulesValue `yaml:"lora-modules"`
+	LoraModules []loraModule
 
 	// TimeToFirstToken time before the first token will be returned, in milliseconds
 	TimeToFirstToken int `yaml:"time-to-first-token"`
@@ -52,42 +55,41 @@ type configuration struct {
 
 type loraModule struct {
 	// Name is the LoRA's name
-	Name string `yaml:"name"`
+	Name string `json:"name"`
 	// Path is the LoRA's path
-	Path string `yaml:"path"`
+	Path string `json:"path"`
 	// BaseModelName is the LoRA's base model
-	BaseModelName string `yaml:"base_model_name"`
+	BaseModelName string `json:"base_model_name"`
 }
 
-type loraModulesValue []loraModule
+// Needed to parse values that contain multiple strings
+type multiString struct {
+	values []string
+}
 
-func (l *loraModulesValue) String() string {
-	b, _ := json.Marshal(l)
-	return string(b)
+func (l *multiString) String() string {
+	return strings.Join(l.values, " ")
 }
 
-func (l *loraModulesValue) Set(val string) error {
-	return json.Unmarshal([]byte(val), l)
+func (l *multiString) Set(val string) error {
+	l.values = append(l.values, val)
+	return nil
 }
 
-func (l *loraModulesValue) Type() string {
-	return "loras"
+func (l *multiString) Type() string {
+	return "strings"
 }
 
-// Implement custom YAML unmarshaling for just this type
-func (l *loraModulesValue) UnmarshalYAML(unmarshal func(interface{}) error) error {
-	// Try parsing as an array of loraModule
-	var arr []loraModule
-	if err := unmarshal(&arr); err == nil {
-		*l = arr
-		return nil
-	}
-	// Try parsing as a JSON string
-	var str string
-	if err := unmarshal(&str); err == nil {
-		return json.Unmarshal([]byte(str), l)
+func (c *configuration) unmarshalLoras() error {
+	c.LoraModules = make([]loraModule, 0)
+	for _, jsonStr := range c.LoraModulesString {
+		var lora loraModule
+		if err := json.Unmarshal([]byte(jsonStr), &lora); err != nil {
+			return err
+		}
+		c.LoraModules = append(c.LoraModules, lora)
 	}
-	return errors.New("lora-modules: invalid format")
+	return nil
 }
 
 func newConfig() *configuration {
@@ -108,7 +110,8 @@ func (c *configuration) load(configFile string) error {
 	if err := yaml.Unmarshal(configBytes, &c); err != nil {
 		return fmt.Errorf("failed to unmarshal configuration: %s", err)
 	}
-	return nil
+
+	return c.unmarshalLoras()
 }
 
 func (c *configuration) validate() error {
@@ -118,7 +121,7 @@ func (c *configuration) validate() error {
 	// Upstream vLLM behaviour: when --served-model-name is not provided,
 	// it falls back to using the value of --model as the single public name
 	// returned by the API and exposed in Prometheus metrics.
-	if len(c.ServedModelNames) == 0 {
+	if len(c.ServedModelNames) == 0 || c.ServedModelNames[0] == "" {
 		c.ServedModelNames = []string{c.Model}
 	}
 
diff --git a/pkg/llm-d-inference-sim/config_test.go b/pkg/llm-d-inference-sim/config_test.go
@@ -78,6 +78,10 @@ var _ = Describe("Simulator configuration", func() {
 		args:           []string{"cmd", "--config", "../../manifests/config.yaml"},
 		expectedConfig: c,
 	}
+	c.LoraModulesString = []string{
+		"{\"name\":\"lora1\",\"path\":\"/path/to/lora1\"}",
+		"{\"name\":\"lora2\",\"path\":\"/path/to/lora2\"}",
+	}
 	tests = append(tests, test)
 
 	// Config from config.yaml file plus command line args
@@ -90,11 +94,65 @@ var _ = Describe("Simulator configuration", func() {
 	c.MaxNumSeqs = 5
 	c.TimeToFirstToken = 2
 	c.InterTokenLatency = 1
-	c.LoraModules = []loraModule{{Name: "lora1", Path: "/path/to/lora1"}, {Name: "lora2", Path: "/path/to/lora2"}}
+	c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}, {Name: "lora4", Path: "/path/to/lora4"}}
+	c.LoraModulesString = []string{
+		"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
+		"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}",
+	}
+	test = testCase{
+		name: "config file with command line args",
+		args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
+			"--served-model-name", "alias1", "alias2",
+			"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}", "{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}",
+		},
+		expectedConfig: c,
+	}
+	tests = append(tests, test)
+
+	// Config from config.yaml file plus command line args with different format
+	c = newConfig()
+	c.Port = 8002
+	c.Model = model
+	c.ServedModelNames = []string{c.Model}
+	c.MaxLoras = 2
+	c.MaxCPULoras = 5
+	c.MaxNumSeqs = 5
+	c.TimeToFirstToken = 2
+	c.InterTokenLatency = 1
+	c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
+	c.LoraModulesString = []string{
+		"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
+	}
+	test = testCase{
+		name: "config file with command line args",
+		args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
+			"--served-model-name",
+			"--lora-modules={\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
+		},
+		expectedConfig: c,
+	}
+	tests = append(tests, test)
+
+	// Config from config.yaml file plus command line args with empty string
+	c = newConfig()
+	c.Port = 8002
+	c.Model = model
+	c.ServedModelNames = []string{c.Model}
+	c.MaxLoras = 2
+	c.MaxCPULoras = 5
+	c.MaxNumSeqs = 5
+	c.TimeToFirstToken = 2
+	c.InterTokenLatency = 1
+	c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
+	c.LoraModulesString = []string{
+		"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
+	}
 	test = testCase{
 		name: "config file with command line args",
 		args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
-			"--served-model-name", "alias1,alias2"},
+			"--served-model-name", "",
+			"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
+		},
 		expectedConfig: c,
 	}
 	tests = append(tests, test)
@@ -140,17 +198,19 @@ var _ = Describe("Simulator configuration", func() {
 		Entry(tests[0].name, tests[0].args, tests[0].expectedConfig),
 		Entry(tests[1].name, tests[1].args, tests[1].expectedConfig),
 		Entry(tests[2].name, tests[2].args, tests[2].expectedConfig),
+		Entry(tests[3].name, tests[3].args, tests[3].expectedConfig),
+		Entry(tests[4].name, tests[4].args, tests[4].expectedConfig),
 	)
 
 	DescribeTable("invalid configurations",
 		func(args []string) {
 			_, err := createSimConfig(args)
 			Expect(err).To(HaveOccurred())
 		},
-		Entry(tests[3].name, tests[3].args),
-		Entry(tests[4].name, tests[4].args),
 		Entry(tests[5].name, tests[5].args),
 		Entry(tests[6].name, tests[6].args),
 		Entry(tests[7].name, tests[7].args),
+		Entry(tests[8].name, tests[8].args),
+		Entry(tests[9].name, tests[9].args),
 	)
 })
diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go
@@ -132,33 +132,36 @@ func (s *VllmSimulator) Start(ctx context.Context) error {
 // parseCommandParamsAndLoadConfig parses and validates command line parameters
 func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
 	config := newConfig()
-	configFile := getConfigPathFromArgs()
-	if configFile != "" {
-		if err := config.load(configFile); err != nil {
+
+	configFileValues := getParamValueFromArgs("config")
+	if len(configFileValues) == 1 {
+		if err := config.load(configFileValues[0]); err != nil {
 			return err
 		}
 	}
 
+	servedModelNames := getParamValueFromArgs("served-model-name")
+	loraModuleNames := getParamValueFromArgs("lora-modules")
+
 	f := pflag.NewFlagSet("llm-d-inference-sim flags", pflag.ExitOnError)
 
 	f.IntVar(&config.Port, "port", config.Port, "Port")
 	f.StringVar(&config.Model, "model", config.Model, "Currently 'loaded' model")
-
-	var servedModelName []string
-	f.StringSliceVar(&servedModelName, "served-model-name", nil, "Model names exposed by the API (comma-separated)")
 	f.IntVar(&config.MaxNumSeqs, "max-num-seqs", config.MaxNumSeqs, "Maximum number of inference requests that could be processed at the same time (parameter to simulate requests waiting queue)")
+	f.IntVar(&config.MaxLoras, "max-loras", config.MaxLoras, "Maximum number of LoRAs in a single batch")
+	f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
 
 	f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode, echo - returns the same text that was sent in the request, for chat completion returns the last message, random - returns random sentence from a bank of pre-defined sentences")
 	f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
 	f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
 
-	var loras loraModulesValue
-	f.Var(&loras, "lora-modules", "List of LoRA adapters (an array in JSON format)")
-
-	f.IntVar(&config.MaxLoras, "max-loras", config.MaxLoras, "Maximum number of LoRAs in a single batch")
-	f.IntVar(&config.MaxCPULoras, "max-cpu-loras", config.MaxCPULoras, "Maximum number of LoRAs to store in CPU memory")
-
+	// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
+	var servedModelNameStrings multiString
+	f.Var(&servedModelNameStrings, "served-model-name", "Model names exposed by the API (a list of space-separated strings)")
+	var configFile string
 	f.StringVar(&configFile, "config", "", "The configuration file")
+	var loras multiString
+	f.Var(&loras, "lora-modules", "List of LoRA adapters (a list of space-separated JSON strings)")
 
 	flagSet := flag.NewFlagSet("simFlagSet", flag.ExitOnError)
 	klog.InitFlags(flagSet)
@@ -169,11 +172,14 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
 	}
 
 	// Need to read in a variable to avoid merging the values with the config file ones
-	if loras != nil {
-		config.LoraModules = loras
+	if loraModuleNames != nil {
+		config.LoraModulesString = loraModuleNames
+		if err := config.unmarshalLoras(); err != nil {
+			return err
+		}
 	}
-	if servedModelName != nil {
-		config.ServedModelNames = servedModelName
+	if servedModelNames != nil {
+		config.ServedModelNames = servedModelNames
 	}
 
 	if err := config.validate(); err != nil {
@@ -191,23 +197,27 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
 	return nil
 }
 
-func getConfigPathFromArgs() string {
-	for i, arg := range os.Args[1:] {
-		if arg == "--config" || arg == "-config" {
-			// Next argument should be the path
-			if i+2 <= len(os.Args)-1 {
-				return os.Args[i+2]
+func getParamValueFromArgs(param string) []string {
+	var values []string
+	var readValues bool
+	for _, arg := range os.Args[1:] {
+		if readValues {
+			if strings.HasPrefix(arg, "--") {
+				break
+			}
+			values = append(values, arg)
+		} else {
+			if arg == "--"+param {
+				readValues = true
+				values = make([]string, 0)
+			} else if strings.HasPrefix(arg, "--"+param+"=") {
+				// Handle --param=value
+				values = append(values, strings.TrimPrefix(arg, "--"+param+"="))
+				break
 			}
-		}
-		// Handle --config=path or -config=path
-		if strings.HasPrefix(arg, "--config=") {
-			return strings.TrimPrefix(arg, "--config=")
-		}
-		if strings.HasPrefix(arg, "-config=") {
-			return strings.TrimPrefix(arg, "-config=")
 		}
 	}
-	return ""
+	return values
 }
 
 func (s *VllmSimulator) newListener() (net.Listener, error) {