Skip to content

Commit 9bb7599

Browse files
committed
misc: add test script for easier testing
Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca>
1 parent 4bbfc36 commit 9bb7599

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

test_llama4_eplb.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
from vllm import LLM, SamplingParams
5+
6+
# Sample prompts.
7+
prompts = [
8+
"Hello, my name is",
9+
"The president of the United States is",
10+
"The capital of France is",
11+
"The future of AI is",
12+
]
13+
# Create a sampling params object.
14+
sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
15+
16+
17+
def main():
18+
# Create an LLM with EPLB parameters.
19+
llm = LLM(
20+
model="/fp8-llama/llama4scout-fp8/",
21+
tensor_parallel_size=8,
22+
enable_expert_parallel=True,
23+
enable_eplb=True,
24+
num_redundant_experts=16,
25+
eplb_window_size=1000,
26+
eplb_step_interval=3000,
27+
trust_remote_code=True,
28+
enforce_eager=True,
29+
)
30+
# Generate texts from the prompts.
31+
# The output is a list of RequestOutput objects
32+
# that contain the prompt, generated text, and other information.
33+
outputs = llm.generate(prompts, sampling_params)
34+
# Print the outputs.
35+
print("\nGenerated Outputs:\n" + "-" * 60)
36+
for output in outputs:
37+
prompt = output.prompt
38+
generated_text = output.outputs[0].text
39+
print(f"Prompt: {prompt!r}")
40+
print(f"Output: {generated_text!r}")
41+
print("-" * 60)
42+
43+
44+
if __name__ == "__main__":
45+
main()

0 commit comments

Comments
 (0)