You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: pkg/llm-d-inference-sim/simulator.go
+25-29Lines changed: 25 additions & 29 deletions
Original file line number
Diff line number
Diff line change
@@ -20,6 +20,7 @@ package llmdinferencesim
20
20
import (
21
21
"context"
22
22
"encoding/json"
23
+
"errors"
23
24
"fmt"
24
25
"net"
25
26
"os"
@@ -86,7 +87,7 @@ func (s *VllmSimulator) parseCommandParams() error {
86
87
varlorasStrstring
87
88
f.StringVar(&lorasStr, "lora", "", "List of LoRA adapters, separated by comma")
88
89
f.IntVar(&s.maxLoras, "max-loras", 1, "Maximum number of LoRAs in a single batch")
89
-
f.IntVar(&s.maxCpuLoras, "max-cpu-loras", 0, "Maximum number of LoRAs to store in CPU memory")
90
+
f.IntVar(&s.maxCPULoras, "max-cpu-loras", 0, "Maximum number of LoRAs to store in CPU memory")
90
91
f.Int64Var(&s.maxRunningReqs, "max-running-requests", 5, "Maximum number of inference requests that could be processed at the same time (parameter to simulate requests waiting queue)")
91
92
92
93
iferr:=f.Parse(os.Args[1:]); err!=nil {
@@ -100,7 +101,7 @@ func (s *VllmSimulator) parseCommandParams() error {
100
101
101
102
// validate parsed values
102
103
ifs.model=="" {
103
-
returnfmt.Errorf("model parameter is empty")
104
+
returnerrors.New("model parameter is empty")
104
105
}
105
106
ifs.mode!=modeEcho&&s.mode!=modeRandom {
106
107
returnfmt.Errorf("invalid mode '%s', valid values are 'random' and 'echo'", s.mode)
@@ -109,20 +110,20 @@ func (s *VllmSimulator) parseCommandParams() error {
109
110
returnfmt.Errorf("invalid port '%d'", s.port)
110
111
}
111
112
ifs.interTokenLatency<0 {
112
-
returnfmt.Errorf("inter token latency cannot be negative")
113
+
returnerrors.New("inter token latency cannot be negative")
113
114
}
114
115
ifs.timeToFirstToken<0 {
115
-
returnfmt.Errorf("time to first token cannot be negative")
116
+
returnerrors.New("time to first token cannot be negative")
116
117
}
117
118
ifs.maxLoras<1 {
118
-
returnfmt.Errorf("max loras cannot be less than 1")
119
+
returnerrors.New("max loras cannot be less than 1")
119
120
}
120
-
ifs.maxCpuLoras==0 {
121
+
ifs.maxCPULoras==0 {
121
122
// max cpu loras by default is same as max loras
122
-
s.maxCpuLoras=s.maxLoras
123
+
s.maxCPULoras=s.maxLoras
123
124
}
124
-
ifs.maxCpuLoras<1 {
125
-
returnfmt.Errorf("max CPU loras cannot be less than 1")
125
+
ifs.maxCPULoras<1 {
126
+
returnerrors.New("max CPU loras cannot be less than 1")
126
127
}
127
128
128
129
// just to suppress not used lint error for now
@@ -309,13 +310,13 @@ func (s *VllmSimulator) reqProcessingWorker(ctx context.Context, id int) {
// createModelsResponse creates and returns ModelResponse for the current state, returned array of models contains the base model + LoRA adapters if exist
0 commit comments