Skip to content

Commit 276f15a

Browse files
committed
Add test for kvcache transfer time command line parameter.
Update config_test to use a function to create configuration same as defined in the config yaml file Signed-off-by: Maya Barnea <mayab@il.ibm.com>
1 parent 10d5088 commit 276f15a

File tree

3 files changed

+57
-56
lines changed

3 files changed

+57
-56
lines changed

manifests/basic-config.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
port: 8001
2+
model: "Qwen/Qwen2-0.5B"
3+
max-num-seqs: 5
4+
mode: "random"
5+
time-to-first-token: 2000
6+
inter-token-latency: 1000
7+
kv_cache_transfer_latency: 100
8+
seed: 100100100

manifests/config.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ lora-modules:
1010
- '{"name":"lora1","path":"/path/to/lora1"}'
1111
- '{"name":"lora2","path":"/path/to/lora2"}'
1212
mode: "random"
13-
time-to-first-token: 2
14-
inter-token-latency: 1
13+
time-to-first-token: 2000
14+
inter-token-latency: 1000
15+
kv_cache_transfer_latency: 100
1516
seed: 100100100

pkg/llm-d-inference-sim/config_test.go

Lines changed: 46 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ import (
2525
)
2626

2727
const (
28-
qwenModelName = "Qwen/Qwen2-0.5B"
29-
seedInConfigFile = 100100100
28+
qwenModelName = "Qwen/Qwen2-0.5B"
3029
)
3130

3231
func createSimConfig(args []string) (*configuration, error) {
@@ -46,6 +45,33 @@ func createSimConfig(args []string) (*configuration, error) {
4645
return s.config, nil
4746
}
4847

48+
func createDefaultBasicConfig(model string) *configuration {
49+
c := newConfig()
50+
51+
c.Model = model
52+
c.ServedModelNames = []string{c.Model}
53+
c.MaxNumSeqs = 5
54+
c.MaxLoras = 1
55+
c.MaxCPULoras = 1
56+
c.TimeToFirstToken = 2000
57+
c.InterTokenLatency = 1000
58+
c.KVCacheTransferLatency = 100
59+
c.Seed = 100100100
60+
c.LoraModules = []loraModule{}
61+
62+
return c
63+
}
64+
65+
func createDefaultConfig(model string) *configuration {
66+
c := createDefaultBasicConfig(model)
67+
68+
// parameters special to config.yaml
69+
c.MaxLoras = 2
70+
c.MaxCPULoras = 5
71+
72+
return c
73+
}
74+
4975
type testCase struct {
5076
name string
5177
args []string
@@ -69,17 +95,10 @@ var _ = Describe("Simulator configuration", func() {
6995
tests = append(tests, test)
7096

7197
// Config from config.yaml file
72-
c = newConfig()
98+
c = createDefaultConfig(qwenModelName)
7399
c.Port = 8001
74-
c.Model = qwenModelName
75100
c.ServedModelNames = []string{"model1", "model2"}
76-
c.MaxLoras = 2
77-
c.MaxCPULoras = 5
78-
c.MaxNumSeqs = 5
79-
c.TimeToFirstToken = 2
80-
c.InterTokenLatency = 1
81101
c.LoraModules = []loraModule{{Name: "lora1", Path: "/path/to/lora1"}, {Name: "lora2", Path: "/path/to/lora2"}}
82-
c.Seed = seedInConfigFile
83102
test = testCase{
84103
name: "config file",
85104
args: []string{"cmd", "--config", "../../manifests/config.yaml"},
@@ -92,15 +111,9 @@ var _ = Describe("Simulator configuration", func() {
92111
tests = append(tests, test)
93112

94113
// Config from config.yaml file plus command line args
95-
c = newConfig()
114+
c = createDefaultConfig(model)
96115
c.Port = 8002
97-
c.Model = model
98116
c.ServedModelNames = []string{"alias1", "alias2"}
99-
c.MaxLoras = 2
100-
c.MaxCPULoras = 5
101-
c.MaxNumSeqs = 5
102-
c.TimeToFirstToken = 2
103-
c.InterTokenLatency = 1
104117
c.Seed = 100
105118
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}, {Name: "lora4", Path: "/path/to/lora4"}}
106119
c.LoraModulesString = []string{
@@ -118,16 +131,8 @@ var _ = Describe("Simulator configuration", func() {
118131
tests = append(tests, test)
119132

120133
// Config from config.yaml file plus command line args with different format
121-
c = newConfig()
134+
c = createDefaultConfig(model)
122135
c.Port = 8002
123-
c.Model = model
124-
c.ServedModelNames = []string{c.Model}
125-
c.MaxLoras = 2
126-
c.MaxCPULoras = 5
127-
c.MaxNumSeqs = 5
128-
c.TimeToFirstToken = 2
129-
c.InterTokenLatency = 1
130-
c.Seed = seedInConfigFile
131136
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
132137
c.LoraModulesString = []string{
133138
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
@@ -143,16 +148,8 @@ var _ = Describe("Simulator configuration", func() {
143148
tests = append(tests, test)
144149

145150
// Config from config.yaml file plus command line args with empty string
146-
c = newConfig()
151+
c = createDefaultConfig(model)
147152
c.Port = 8002
148-
c.Model = model
149-
c.ServedModelNames = []string{c.Model}
150-
c.MaxLoras = 2
151-
c.MaxCPULoras = 5
152-
c.MaxNumSeqs = 5
153-
c.TimeToFirstToken = 2
154-
c.InterTokenLatency = 1
155-
c.Seed = seedInConfigFile
156153
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
157154
c.LoraModulesString = []string{
158155
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
@@ -168,18 +165,10 @@ var _ = Describe("Simulator configuration", func() {
168165
tests = append(tests, test)
169166

170167
// Config from config.yaml file plus command line args with empty string for loras
171-
c = newConfig()
168+
c = createDefaultConfig(qwenModelName)
172169
c.Port = 8001
173-
c.Model = qwenModelName
174170
c.ServedModelNames = []string{"model1", "model2"}
175-
c.MaxLoras = 2
176-
c.MaxCPULoras = 5
177-
c.MaxNumSeqs = 5
178-
c.TimeToFirstToken = 2
179-
c.InterTokenLatency = 1
180-
c.LoraModules = []loraModule{}
181171
c.LoraModulesString = []string{}
182-
c.Seed = seedInConfigFile
183172
test = testCase{
184173
name: "config file with command line args with empty string for loras",
185174
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules", ""},
@@ -188,25 +177,28 @@ var _ = Describe("Simulator configuration", func() {
188177
tests = append(tests, test)
189178

190179
// Config from config.yaml file plus command line args with empty parameter for loras
191-
c = newConfig()
180+
c = createDefaultConfig(qwenModelName)
192181
c.Port = 8001
193-
c.Model = qwenModelName
194182
c.ServedModelNames = []string{"model1", "model2"}
195-
c.MaxLoras = 2
196-
c.MaxCPULoras = 5
197-
c.MaxNumSeqs = 5
198-
c.TimeToFirstToken = 2
199-
c.InterTokenLatency = 1
200-
c.LoraModules = []loraModule{}
201183
c.LoraModulesString = []string{}
202-
c.Seed = seedInConfigFile
203184
test = testCase{
204185
name: "config file with command line args with empty parameter for loras",
205186
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules"},
206187
expectedConfig: c,
207188
}
208189
tests = append(tests, test)
209190

191+
// Config from config.yaml file plus command line args with time to copy cache
192+
c = createDefaultBasicConfig(qwenModelName)
193+
c.Port = 8001
194+
c.KVCacheTransferLatency = 50
195+
test = testCase{
196+
name: "config file with command line args with time to transfer kv-cache",
197+
args: []string{"cmd", "--config", "../../manifests/basic-config.yaml", "--kv_cache_transfer_latency", "50"},
198+
expectedConfig: c,
199+
}
200+
tests = append(tests, test)
201+
210202
// Invalid configurations
211203
test = testCase{
212204
name: "invalid model",
@@ -258,14 +250,14 @@ var _ = Describe("Simulator configuration", func() {
258250
Entry(tests[4].name, tests[4].args, tests[4].expectedConfig),
259251
Entry(tests[5].name, tests[5].args, tests[5].expectedConfig),
260252
Entry(tests[6].name, tests[6].args, tests[6].expectedConfig),
253+
Entry(tests[7].name, tests[7].args, tests[7].expectedConfig),
261254
)
262255

263256
DescribeTable("invalid configurations",
264257
func(args []string) {
265258
_, err := createSimConfig(args)
266259
Expect(err).To(HaveOccurred())
267260
},
268-
Entry(tests[7].name, tests[7].args),
269261
Entry(tests[8].name, tests[8].args),
270262
Entry(tests[9].name, tests[9].args),
271263
Entry(tests[10].name, tests[10].args),

0 commit comments

Comments
 (0)