Initialize rand once, added seed to configuration (#79)

irar2 · web-flow · commit 2adff3904166 · 2025-07-10T08:07:20.000+03:00
* Initialize rand once, added seed to configuration

Signed-off-by: Ira &lt;IRAR@il.ibm.com&gt;

* Added a test, updated README

Signed-off-by: Ira &lt;IRAR@il.ibm.com&gt;

---------

Signed-off-by: Ira &lt;IRAR@il.ibm.com&gt;
diff --git a/README.md b/README.md
@@ -98,6 +98,7 @@ For more details see the <a href="https://docs.vllm.ai/en/stable/getting_started
     - `random`: returns a sentence chosen at random from a set of pre-defined sentences
 - `time-to-first-token`: the time to the first token (in milliseconds), optional, by default zero
 - `inter-token-latency`: the time to 'generate' each additional token (in milliseconds), optional, by default zero
+- `seed`: random seed for operations (if not set, current Unix time in nanoseconds is used)
 
 In addition, as we are using klog, the following parameters are available:
 - `add_dir_header`: if true, adds the file directory to the header of the log messages
diff --git a/manifests/config.yaml b/manifests/config.yaml
@@ -12,3 +12,4 @@ lora-modules:
 mode: "random"
 time-to-first-token: 2
 inter-token-latency: 1
+seed: 100100100
diff --git a/pkg/llm-d-inference-sim/config.go b/pkg/llm-d-inference-sim/config.go
@@ -22,6 +22,7 @@ import (
 	"fmt"
 	"os"
 	"strings"
+	"time"
 
 	"gopkg.in/yaml.v3"
 )
@@ -51,6 +52,8 @@ type configuration struct {
 	InterTokenLatency int `yaml:"inter-token-latency"`
 	// Mode defines the simulator response generation mode, valid values: echo, random
 	Mode string `yaml:"mode"`
+	// Seed defines random seed for operations
+	Seed int64 `yaml:"seed"`
 }
 
 type loraModule struct {
@@ -98,6 +101,7 @@ func newConfig() *configuration {
 		MaxLoras:   1,
 		MaxNumSeqs: 5,
 		Mode:       modeRandom,
+		Seed:       time.Now().UnixNano(),
 	}
 }
 
diff --git a/pkg/llm-d-inference-sim/config_test.go b/pkg/llm-d-inference-sim/config_test.go
@@ -50,14 +50,15 @@ type testCase struct {
 var _ = Describe("Simulator configuration", func() {
 	tests := make([]testCase, 0)
 
-	// Simple config with only model name set
+	// Simple config with a few parameters
 	c := newConfig()
 	c.Model = model
 	c.ServedModelNames = []string{c.Model}
 	c.MaxCPULoras = 1
+	c.Seed = 100
 	test := testCase{
 		name:           "simple",
-		args:           []string{"cmd", "--model", model, "--mode", modeRandom},
+		args:           []string{"cmd", "--model", model, "--mode", modeRandom, "--seed", "100"},
 		expectedConfig: c,
 	}
 	tests = append(tests, test)
@@ -73,6 +74,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.TimeToFirstToken = 2
 	c.InterTokenLatency = 1
 	c.LoraModules = []loraModule{{Name: "lora1", Path: "/path/to/lora1"}, {Name: "lora2", Path: "/path/to/lora2"}}
+	c.Seed = 100100100
 	test = testCase{
 		name:           "config file",
 		args:           []string{"cmd", "--config", "../../manifests/config.yaml"},
@@ -94,6 +96,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.MaxNumSeqs = 5
 	c.TimeToFirstToken = 2
 	c.InterTokenLatency = 1
+	c.Seed = 100
 	c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}, {Name: "lora4", Path: "/path/to/lora4"}}
 	c.LoraModulesString = []string{
 		"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
@@ -102,7 +105,7 @@ var _ = Describe("Simulator configuration", func() {
 	test = testCase{
 		name: "config file with command line args",
 		args: []string{"cmd", "--model", model, "--config", "../../manifests/config.yaml", "--port", "8002",
-			"--served-model-name", "alias1", "alias2",
+			"--served-model-name", "alias1", "alias2", "--seed", "100",
 			"--lora-modules", "{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}", "{\"name\":\"lora4\",\"path\":\"/path/to/lora4\"}",
 		},
 		expectedConfig: c,
@@ -119,6 +122,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.MaxNumSeqs = 5
 	c.TimeToFirstToken = 2
 	c.InterTokenLatency = 1
+	c.Seed = 100100100
 	c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
 	c.LoraModulesString = []string{
 		"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
@@ -143,6 +147,7 @@ var _ = Describe("Simulator configuration", func() {
 	c.MaxNumSeqs = 5
 	c.TimeToFirstToken = 2
 	c.InterTokenLatency = 1
+	c.Seed = 100100100
 	c.LoraModules = []loraModule{{Name: "lora3", Path: "/path/to/lora3"}}
 	c.LoraModulesString = []string{
 		"{\"name\":\"lora3\",\"path\":\"/path/to/lora3\"}",
diff --git a/pkg/llm-d-inference-sim/seed_test.go b/pkg/llm-d-inference-sim/seed_test.go
@@ -0,0 +1,120 @@
+/*
+Copyright 2025 The llm-d-inference-sim Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package llmdinferencesim
+
+import (
+	"context"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+	"github.com/openai/openai-go"
+	"github.com/openai/openai-go/option"
+)
+
+var _ = Describe("Simulator with seed", func() {
+	firstText := ""
+	DescribeTable("text completions with the same seed",
+		// use a function so that httpClient is captured when running
+		func() {
+			ctx := context.TODO()
+			client, err := startServerWithArgs(ctx, modeRandom,
+				[]string{"cmd", "--model", model, "--mode", modeRandom, "--seed", "100"})
+			Expect(err).NotTo(HaveOccurred())
+
+			openaiclient := openai.NewClient(
+				option.WithBaseURL(baseURL),
+				option.WithHTTPClient(client))
+			params := openai.CompletionNewParams{
+				Prompt: openai.CompletionNewParamsPromptUnion{
+					OfString: openai.String(userMessage),
+				},
+				Model: openai.CompletionNewParamsModel(model),
+			}
+
+			resp, err := openaiclient.Completions.New(ctx, params)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(resp.Choices).ShouldNot(BeEmpty())
+			Expect(string(resp.Object)).To(Equal(textCompletionObject))
+
+			text := resp.Choices[0].Text
+			Expect(text).ShouldNot(BeEmpty())
+			if firstText == "" {
+				firstText = text
+			} else {
+				Expect(text).Should(Equal(firstText))
+			}
+		},
+		Entry("first time text completion with seed"),
+		Entry("second time text completion with seed"),
+		Entry("third time text completion with seed"),
+		Entry("fourth time text completion with seed"),
+		Entry("fifth time text completion with seed"),
+		Entry("sixth time text completion with seed"),
+		Entry("seventh time text completion with seed"),
+		Entry("eighth time text completion with seed"),
+	)
+
+	texts := make([]string, 0)
+	DescribeTable("text completions with different seeds",
+		func(lastTest bool) {
+			ctx := context.TODO()
+			client, err := startServer(ctx, modeRandom)
+			Expect(err).NotTo(HaveOccurred())
+
+			openaiclient := openai.NewClient(
+				option.WithBaseURL(baseURL),
+				option.WithHTTPClient(client))
+			params := openai.CompletionNewParams{
+				Prompt: openai.CompletionNewParamsPromptUnion{
+					OfString: openai.String(userMessage),
+				},
+				Model: openai.CompletionNewParamsModel(model),
+			}
+
+			resp, err := openaiclient.Completions.New(ctx, params)
+			Expect(err).NotTo(HaveOccurred())
+			Expect(resp.Choices).ShouldNot(BeEmpty())
+			Expect(string(resp.Object)).To(Equal(textCompletionObject))
+
+			text := resp.Choices[0].Text
+			Expect(text).ShouldNot(BeEmpty())
+			texts = append(texts, text)
+			if lastTest {
+				Expect(hasAtLeastTwoDifferentTexts(texts)).To(BeTrue())
+			}
+		},
+		Entry("first time text completion without seed", false),
+		Entry("second time text completion without seed", false),
+		Entry("third time text completion without seed", false),
+		Entry("fourth time text completion without seed", false),
+		Entry("fifth time text completion without seed", false),
+		Entry("sixth time text completion without seed", false),
+		Entry("seventh time text completion without seed", false),
+		Entry("eighth time text completion without seed", true),
+	)
+})
+
+func hasAtLeastTwoDifferentTexts(texts []string) bool {
+	unique := make(map[string]struct{})
+	for _, s := range texts {
+		unique[s] = struct{}{}
+		if len(unique) > 1 {
+			return true
+		}
+	}
+	return false
+}
diff --git a/pkg/llm-d-inference-sim/simulator.go b/pkg/llm-d-inference-sim/simulator.go
@@ -154,6 +154,7 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
 	f.StringVar(&config.Mode, "mode", config.Mode, "Simulator mode, echo - returns the same text that was sent in the request, for chat completion returns the last message, random - returns random sentence from a bank of pre-defined sentences")
 	f.IntVar(&config.InterTokenLatency, "inter-token-latency", config.InterTokenLatency, "Time to generate one token (in milliseconds)")
 	f.IntVar(&config.TimeToFirstToken, "time-to-first-token", config.TimeToFirstToken, "Time to first token (in milliseconds)")
+	f.Int64Var(&config.Seed, "seed", config.Seed, "Random seed for operations (if not set, current Unix time in nanoseconds is used)")
 
 	// These values were manually parsed above in getParamValueFromArgs, we leave this in order to get these flags in --help
 	var servedModelNameStrings multiString
@@ -192,6 +193,8 @@ func (s *VllmSimulator) parseCommandParamsAndLoadConfig() error {
 		s.loraAdaptors.Store(lora, "")
 	}
 
+	initRandom(s.config.Seed)
+
 	// just to suppress not used lint error for now
 	_ = &s.waitingLoras
 	return nil
diff --git a/pkg/llm-d-inference-sim/simulator_test.go b/pkg/llm-d-inference-sim/simulator_test.go
@@ -40,11 +40,20 @@ const baseURL = "http://localhost/v1"
 const userMessage = "This is a test."
 
 func startServer(ctx context.Context, mode string) (*http.Client, error) {
+	return startServerWithArgs(ctx, mode, nil)
+}
+
+func startServerWithArgs(ctx context.Context, mode string, args []string) (*http.Client, error) {
 	oldArgs := os.Args
 	defer func() {
 		os.Args = oldArgs
 	}()
-	os.Args = []string{"cmd", "--model", model, "--mode", mode}
+
+	if args != nil {
+		os.Args = args
+	} else {
+		os.Args = []string{"cmd", "--model", model, "--mode", mode}
+	}
 	logger := klog.Background()
 
 	s, err := New(logger)
diff --git a/pkg/llm-d-inference-sim/utils.go b/pkg/llm-d-inference-sim/utils.go
@@ -21,7 +21,6 @@ import (
 	"math/rand"
 	"regexp"
 	"strings"
-	"time"
 )
 
 // list of responses to use in random mode for comepltion requests
@@ -111,11 +110,16 @@ func randomNumericString(length int) string {
 	return string(result)
 }
 
+var randomGenerator *rand.Rand
+
+func initRandom(seed int64) {
+	src := rand.NewSource(seed)
+	randomGenerator = rand.New(src)
+}
+
 // Returns an integer between min and max (included)
 func randomInt(min int, max int) int {
-	src := rand.NewSource(time.Now().UnixNano())
-	r := rand.New(src)
-	return r.Intn(max-min+1) + min
+	return randomGenerator.Intn(max-min+1) + min
 }
 
 // Returns true or false randomly
@@ -125,9 +129,7 @@ func flipCoin() bool {
 
 // Returns a random float64 in the range [min, max)
 func randomFloat(min float64, max float64) float64 {
-	src := rand.NewSource(time.Now().UnixNano())
-	r := rand.New(src)
-	return r.Float64()*(max-min) + min
+	return randomGenerator.Float64()*(max-min) + min
 }
 
 // Regular expression for the response tokenization

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@ import (`
`22`	`22`	`"fmt"`
`23`	`23`	`"os"`
`24`	`24`	`"strings"`
	`25`	`+ "time"`
`25`	`26`
`26`	`27`	`"gopkg.in/yaml.v3"`
`27`	`28`	`)`
`@@ -51,6 +52,8 @@ type configuration struct {`
`51`	`52`	InterTokenLatency int `yaml:"inter-token-latency"`
`52`	`53`	`// Mode defines the simulator response generation mode, valid values: echo, random`
`53`	`54`	Mode string `yaml:"mode"`
	`55`	`+ // Seed defines random seed for operations`
	`56`	+ Seed int64 `yaml:"seed"`
`54`	`57`	`}`
`55`	`58`
`56`	`59`	`type loraModule struct {`
`@@ -98,6 +101,7 @@ func newConfig() *configuration {`
`98`	`101`	`MaxLoras: 1,`
`99`	`102`	`MaxNumSeqs: 5,`
`100`	`103`	`Mode: modeRandom,`
	`104`	`+ Seed: time.Now().UnixNano(),`
`101`	`105`	`}`
`102`	`106`	`}`
`103`	`107`