Skip to content

Commit 7656a3c

Browse files
authored
Added tests for LoRA configuration, load and unload (#86)
* Added tests for LoRAs Signed-off-by: Ira <IRAR@il.ibm.com> * Revert "Added tests for LoRAs" This reverts commit 547d909. * Added tests for LoRAs Signed-off-by: Ira <IRAR@il.ibm.com> * Added a call to models, added a constant Signed-off-by: Ira <IRAR@il.ibm.com> --------- Signed-off-by: Ira <IRAR@il.ibm.com>
1 parent eed4e1d commit 7656a3c

File tree

4 files changed

+142
-7
lines changed

4 files changed

+142
-7
lines changed

pkg/llm-d-inference-sim/config_test.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ import (
2424
"k8s.io/klog/v2"
2525
)
2626

27-
const qwenModelName = "Qwen/Qwen2-0.5B"
27+
const (
28+
qwenModelName = "Qwen/Qwen2-0.5B"
29+
seedInConfigFile = 100100100
30+
)
2831

2932
func createSimConfig(args []string) (*configuration, error) {
3033
oldArgs := os.Args
@@ -76,7 +79,7 @@ var _ = Describe("Simulator configuration", func() {
7679
c.TimeToFirstToken = 2
7780
c.InterTokenLatency = 1
7881
c.LoraModules = []loraModule{{Name: "lora1", Path: "/path/to/lora1"}, {Name: "lora2", Path: "/path/to/lora2"}}
79-
c.Seed = 100100100
82+
c.Seed = seedInConfigFile
8083
test = testCase{
8184
name: "config file",
8285
args: []string{"cmd", "--config", "../../manifests/config.yaml"},
@@ -124,7 +127,7 @@ var _ = Describe("Simulator configuration", func() {
124127
c.MaxNumSeqs = 5
125128
c.TimeToFirstToken = 2
126129
c.InterTokenLatency = 1
127-
c.Seed = 100100100
130+
c.Seed = seedInConfigFile
128131
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
129132
c.LoraModulesString = []string{
130133
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
@@ -149,7 +152,7 @@ var _ = Describe("Simulator configuration", func() {
149152
c.MaxNumSeqs = 5
150153
c.TimeToFirstToken = 2
151154
c.InterTokenLatency = 1
152-
c.Seed = 100100100
155+
c.Seed = seedInConfigFile
153156
c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
154157
c.LoraModulesString = []string{
155158
"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
@@ -176,6 +179,7 @@ var _ = Describe("Simulator configuration", func() {
176179
c.InterTokenLatency = 1
177180
c.LoraModules = []loraModule{}
178181
c.LoraModulesString = []string{}
182+
c.Seed = seedInConfigFile
179183
test = testCase{
180184
name: "config file with command line args with empty string for loras",
181185
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules", ""},
@@ -195,6 +199,7 @@ var _ = Describe("Simulator configuration", func() {
195199
c.InterTokenLatency = 1
196200
c.LoraModules = []loraModule{}
197201
c.LoraModulesString = []string{}
202+
c.Seed = seedInConfigFile
198203
test = testCase{
199204
name: "config file with command line args with empty parameter for loras",
200205
args: []string{"cmd", "--config", "../../manifests/config.yaml", "--lora-modules"},

pkg/llm-d-inference-sim/lora_test.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/*
2+
Copyright 2025 The llm-d-inference-sim Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package llmdinferencesim
18+
19+
import (
20+
"context"
21+
"encoding/json"
22+
"errors"
23+
24+
vllmapi "github.com/llm-d/llm-d-inference-sim/pkg/vllm-api"
25+
. "github.com/onsi/ginkgo/v2"
26+
. "github.com/onsi/gomega"
27+
"github.com/openai/openai-go"
28+
"github.com/openai/openai-go/option"
29+
)
30+
31+
var _ = Describe("LoRAs", func() {
32+
Context("LoRAs config and load", func() {
33+
It("Should config, load and load LoRAs correctly", func() {
34+
ctx := context.TODO()
35+
client, err := startServerWithArgs(ctx, "",
36+
[]string{"cmd", "--model", model, "--mode", modeEcho,
37+
"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
38+
"{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}"})
39+
Expect(err).NotTo(HaveOccurred())
40+
41+
openaiclient := openai.NewClient(
42+
option.WithBaseURL(baseURL),
43+
option.WithHTTPClient(client))
44+
45+
// Request to lora3
46+
params := openai.ChatCompletionNewParams{
47+
Messages: []openai.ChatCompletionMessageParamUnion{
48+
openai.UserMessage(userMessage),
49+
},
50+
Model: "lora3",
51+
}
52+
resp, err := openaiclient.Chat.Completions.New(ctx, params)
53+
Expect(err).ToNot(HaveOccurred())
54+
55+
Expect(resp.Choices).ShouldNot(BeEmpty())
56+
Expect(string(resp.Object)).To(Equal(chatCompletionObject))
57+
58+
msg := resp.Choices[0].Message.Content
59+
Expect(msg).Should(Equal(userMessage))
60+
61+
// Unknown model, should return 404
62+
params.Model = "lora1"
63+
_, err = openaiclient.Chat.Completions.New(ctx, params)
64+
Expect(err).To(HaveOccurred())
65+
var openaiError *openai.Error
66+
ok := errors.As(err, &openaiError)
67+
Expect(ok).To(BeTrue())
68+
Expect(openaiError.StatusCode).To(Equal(404))
69+
70+
// Add lora1
71+
payload := map[string]string{
72+
"lora_name": "lora1", // Name to register the adapter as
73+
"lora_path": "/path/to/lora1", // Local or remote path
74+
}
75+
76+
loraParams, err := json.Marshal(payload)
77+
Expect(err).ToNot(HaveOccurred())
78+
79+
options := option.WithHeader("Content-Type", "application/json")
80+
err = openaiclient.Post(ctx, "/load_lora_adapter", loraParams, nil, options)
81+
Expect(err).ToNot(HaveOccurred())
82+
83+
// Should be four models: base model and three LoRAs
84+
var modelsResp vllmapi.ModelsResponse
85+
err = openaiclient.Get(ctx, "/models", nil, &modelsResp)
86+
Expect(err).ToNot(HaveOccurred())
87+
Expect(modelsResp).NotTo(BeNil())
88+
Expect(modelsResp.Data).To(HaveLen(4))
89+
90+
// Request to lora1, should work now
91+
resp, err = openaiclient.Chat.Completions.New(ctx, params)
92+
Expect(err).ToNot(HaveOccurred())
93+
94+
Expect(resp.Choices).ShouldNot(BeEmpty())
95+
Expect(string(resp.Object)).To(Equal(chatCompletionObject))
96+
97+
msg = resp.Choices[0].Message.Content
98+
Expect(msg).Should(Equal(userMessage))
99+
100+
// Unload lora3
101+
payload = map[string]string{
102+
"lora_name": "lora3", // Name to register the adapter as
103+
"lora_path": "/path/to/lora3", // Local or remote path
104+
}
105+
106+
loraParams, err = json.Marshal(payload)
107+
Expect(err).ToNot(HaveOccurred())
108+
options = option.WithHeader("Content-Type", "application/json")
109+
err = openaiclient.Post(ctx, "/unload_lora_adapter", loraParams, nil, options)
110+
Expect(err).ToNot(HaveOccurred())
111+
112+
// We should now get an error now
113+
params.Model = "lora3"
114+
_, err = openaiclient.Chat.Completions.New(ctx, params)
115+
Expect(err).To(HaveOccurred())
116+
ok = errors.As(err, &openaiError)
117+
Expect(ok).To(BeTrue())
118+
Expect(openaiError.StatusCode).To(Equal(404))
119+
120+
// Should be three models: base model and two LoRAs
121+
err = openaiclient.Get(ctx, "/models", nil, &modelsResp)
122+
Expect(err).ToNot(HaveOccurred())
123+
Expect(modelsResp).NotTo(BeNil())
124+
Expect(modelsResp.Data).To(HaveLen(3))
125+
})
126+
})
127+
})

pkg/llm-d-inference-sim/metrics.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,12 @@ func (s *VllmSimulator) setInitialPrometheusMetrics() {
114114

115115
// reportLoras sets information about loaded LoRA adapters
116116
func (s *VllmSimulator) reportLoras() {
117-
var loras []string
117+
if s.loraInfo == nil {
118+
// Happens in the tests
119+
return
120+
}
118121

122+
var loras []string
119123
s.runningLoras.Range(func(key interface{}, _ interface{}) bool {
120124
if lora, ok := key.(string); ok {
121125
loras = append(loras, lora)

pkg/llm-d-inference-sim/simulator.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ func (s *VllmSimulator) HandleLoadLora(ctx *fasthttp.RequestCtx) {
324324
}
325325

326326
func (s *VllmSimulator) HandleUnloadLora(ctx *fasthttp.RequestCtx) {
327-
s.logger.Info("load lora request received")
327+
s.logger.Info("unload lora request received")
328328
s.unloadLora(ctx)
329329
}
330330

@@ -512,7 +512,6 @@ func (s *VllmSimulator) responseSentCallback(model string) {
512512
}
513513

514514
s.reportLoras()
515-
516515
}
517516

518517
// sendCompletionError sends an error response for the current completion request

0 commit comments

Comments
 (0)