misc: add test script for easier testing

b8zhong · b8zhong · commit 02427affaccb · 2025-07-13T23:22:21.000-04:00
Signed-off-by: Brayden Zhong &lt;b8zhong@uwaterloo.ca&gt;
diff --git a/test_llama4_eplb.py b/test_llama4_eplb.py
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm import LLM, SamplingParams
+
+# Sample prompts.
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+# Create a sampling params object.
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+
+def main():
+    # Create an LLM with EPLB parameters.
+    llm = LLM(
+        model="/fp8-llama/llama4scout-fp8/",
+        tensor_parallel_size=8,
+        enable_expert_parallel=True,
+        enable_eplb=True,
+        num_redundant_experts=16,
+        eplb_window_size=1000,
+        eplb_step_interval=3000,
+        trust_remote_code=True,
+        enforce_eager=True,
+    )
+    # Generate texts from the prompts.
+    # The output is a list of RequestOutput objects
+    # that contain the prompt, generated text, and other information.
+    outputs = llm.generate(prompts, sampling_params)
+    # Print the outputs.
+    print("\nGenerated Outputs:\n" + "-" * 60)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt:    {prompt!r}")
+        print(f"Output:    {generated_text!r}")
+        print("-" * 60)
+
+
+if __name__ == "__main__":
+    main()