You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
pflag.StringVar(&s.mode, "mode", "random", "Simulator mode, echo - returns the same text that was sent in the request, for chat completion returns the last message, random - returns random sentence from a bank of pre-defined sentences")
73
-
pflag.IntVar(&s.port, "port", 0, "Port")
74
-
pflag.IntVar(&s.interTokenLatency, "inter-token-latency", 0, "Time to generate one token (in milliseconds)")
75
-
pflag.IntVar(&s.timeToFirstToken, "time-to-first-token", 0, "Time to first token (in milliseconds)")
f.StringVar(&s.mode, "mode", "random", "Simulator mode, echo - returns the same text that was sent in the request, for chat completion returns the last message, random - returns random sentence from a bank of pre-defined sentences")
pflag.StringVar(&lorasStr, "lora", "", "List of LoRA adapters, separated by comma")
79
-
pflag.IntVar(&s.maxLoras, "max-loras", 1, "Maximum number of LoRAs in a single batch")
80
-
pflag.IntVar(&s.maxCpuLoras, "max-cpu-loras", 0, "Maximum number of LoRAs to store in CPU memory")
81
-
pflag.Int64Var(&s.maxRunningReqs, "max-running-requests", 5, "Maximum number of inference requests that could be processed at the same time (parameter to simulate requests waiting queue)")
87
+
f.StringVar(&lorasStr, "lora", "", "List of LoRA adapters, separated by comma")
88
+
f.IntVar(&s.maxLoras, "max-loras", 1, "Maximum number of LoRAs in a single batch")
89
+
f.IntVar(&s.maxCpuLoras, "max-cpu-loras", 0, "Maximum number of LoRAs to store in CPU memory")
90
+
f.Int64Var(&s.maxRunningReqs, "max-running-requests", 5, "Maximum number of inference requests that could be processed at the same time (parameter to simulate requests waiting queue)")
82
91
83
-
pflag.Parse()
92
+
iferr:=f.Parse(os.Args[1:]); err!=nil {
93
+
returnerr
94
+
}
84
95
85
96
loras:=strings.Split(lorasStr, ",")
86
97
for_, lora:=rangeloras {
@@ -120,8 +131,17 @@ func (s *VllmSimulator) parseCommandParams() error {
0 commit comments