Skip to content

Commit 0fd1f87

Browse files
committed
Merge branch 'main' into eagle-kernel-fusion
2 parents ab20d2f + ca2f6b9 commit 0fd1f87

File tree

519 files changed

+15885
-8954
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

519 files changed

+15885
-8954
lines changed

.buildkite/nightly-benchmarks/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ WARNING: The benchmarking script will save json results by itself, so please do
113113

114114
### Visualizing the results
115115

116-
The `convert-results-json-to-markdown.py` helps you put the benchmarking results inside a markdown table, by formatting [descriptions.md](tests/descriptions.md) with real benchmarking results.
116+
The `convert-results-json-to-markdown.py` helps you put the benchmarking results inside a markdown table, by formatting [descriptions.md](performance-benchmarks-descriptions.md) with real benchmarking results.
117117
You can find the result presented as a table inside the `buildkite/performance-benchmark` job page.
118118
If you do not see the table, please wait till the benchmark finish running.
119119
The json version of the table (together with the json version of the benchmark) will be also attached to the markdown file.

.buildkite/pyproject.toml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,6 @@
66

77
[tool.ruff]
88
line-length = 88
9-
exclude = [
10-
# External file, leaving license intact
11-
"examples/other/fp8/quantizer/quantize.py",
12-
"vllm/vllm_flash_attn/flash_attn_interface.pyi"
13-
]
149

1510
[tool.ruff.lint.per-file-ignores]
1611
"vllm/third_party/**" = ["ALL"]

.buildkite/scripts/hardware_ci/run-hpu-test.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,17 @@ docker build -t hpu-test-env -f docker/Dockerfile.hpu .
1010
# Setup cleanup
1111
# certain versions of HPU software stack have a bug that can
1212
# override the exit code of the script, so we need to use
13-
# separate remove_docker_container and remove_docker_container_and_exit
13+
# separate remove_docker_containers and remove_docker_containers_and_exit
1414
# functions, while other platforms only need one remove_docker_container
1515
# function.
1616
EXITCODE=1
17-
remove_docker_container() { docker rm -f hpu-test || true; }
18-
remove_docker_container_and_exit() { remove_docker_container; exit $EXITCODE; }
19-
trap remove_docker_container_and_exit EXIT
20-
remove_docker_container
17+
remove_docker_containers() { docker rm -f hpu-test || true; docker rm -f hpu-test-tp2 || true; }
18+
remove_docker_containers_and_exit() { remove_docker_containers; exit $EXITCODE; }
19+
trap remove_docker_containers_and_exit EXIT
20+
remove_docker_containers
2121

2222
# Run the image and launch offline inference
2323
docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
24+
docker run --runtime=habana --name=hpu-test-tp2 --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --tensor-parallel-size 2
25+
2426
EXITCODE=$?

.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

Lines changed: 166 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -2,102 +2,180 @@
22

33
set -xu
44

5-
# Build the docker image.
6-
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
75

8-
# Set up cleanup.
9-
remove_docker_container() { docker rm -f tpu-test || true; }
6+
remove_docker_container() {
7+
docker rm -f tpu-test || true;
8+
docker rm -f vllm-tpu || true;
9+
}
10+
1011
trap remove_docker_container EXIT
12+
1113
# Remove the container that might not be cleaned up in the previous run.
1214
remove_docker_container
1315

16+
# Build the docker image.
17+
docker build -f docker/Dockerfile.tpu -t vllm-tpu .
18+
19+
# Set up cleanup.
20+
cleanup_docker() {
21+
# Get Docker's root directory
22+
docker_root=$(docker info -f '{{.DockerRootDir}}')
23+
if [ -z "$docker_root" ]; then
24+
echo "Failed to determine Docker root directory."
25+
exit 1
26+
fi
27+
echo "Docker root directory: $docker_root"
28+
# Check disk usage of the filesystem where Docker's root directory is located
29+
disk_usage=$(df "$docker_root" | tail -1 | awk '{print $5}' | sed 's/%//')
30+
# Define the threshold
31+
threshold=70
32+
if [ "$disk_usage" -gt "$threshold" ]; then
33+
echo "Disk usage is above $threshold%. Cleaning up Docker images and volumes..."
34+
# Remove dangling images (those that are not tagged and not used by any container)
35+
docker image prune -f
36+
# Remove unused volumes / force the system prune for old images as well.
37+
docker volume prune -f && docker system prune --force --filter "until=72h" --all
38+
echo "Docker images and volumes cleanup completed."
39+
else
40+
echo "Disk usage is below $threshold%. No cleanup needed."
41+
fi
42+
}
43+
cleanup_docker
44+
1445
# For HF_TOKEN.
1546
source /etc/environment
16-
# Run a simple end-to-end example.
47+
1748
docker run --privileged --net host --shm-size=16G -it \
1849
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
19-
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
20-
&& python3 -m pip install pytest pytest-asyncio tpu-info \
21-
&& python3 -m pip install lm_eval[api]==0.4.4 \
22-
&& export VLLM_XLA_CACHE_PATH= \
23-
&& export VLLM_USE_V1=1 \
24-
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
25-
&& echo HARDWARE \
26-
&& tpu-info \
27-
&& { \
28-
echo TEST_0: Running test_perf.py; \
29-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_perf.py; \
30-
echo TEST_0_EXIT_CODE: \$?; \
31-
} & \
32-
{ \
33-
echo TEST_1: Running test_compilation.py; \
34-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py; \
35-
echo TEST_1_EXIT_CODE: \$?; \
36-
} & \
37-
{ \
38-
echo TEST_2: Running test_basic.py; \
39-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py; \
40-
echo TEST_2_EXIT_CODE: \$?; \
41-
} & \
42-
{ \
43-
echo TEST_3: Running test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
44-
python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine; \
45-
echo TEST_3_EXIT_CODE: \$?; \
46-
} & \
47-
{ \
48-
echo TEST_4: Running test_quantization_accuracy.py; \
49-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py; \
50-
echo TEST_4_EXIT_CODE: \$?; \
51-
} & \
52-
{ \
53-
echo TEST_5: Running examples/offline_inference/tpu.py; \
54-
python3 /workspace/vllm/examples/offline_inference/tpu.py; \
55-
echo TEST_5_EXIT_CODE: \$?; \
56-
} & \
57-
{ \
58-
echo TEST_6: Running test_tpu_model_runner.py; \
59-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/worker/test_tpu_model_runner.py; \
60-
echo TEST_6_EXIT_CODE: \$?; \
61-
} & \
62-
{ \
63-
echo TEST_7: Running test_sampler.py; \
64-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py; \
65-
echo TEST_7_EXIT_CODE: \$?; \
66-
} & \
67-
{ \
68-
echo TEST_8: Running test_topk_topp_sampler.py; \
69-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py; \
70-
echo TEST_8_EXIT_CODE: \$?; \
71-
} & \
72-
{ \
73-
echo TEST_9: Running test_multimodal.py; \
74-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py; \
75-
echo TEST_9_EXIT_CODE: \$?; \
76-
} & \
77-
{ \
78-
echo TEST_10: Running test_pallas.py; \
79-
python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py; \
80-
echo TEST_10_EXIT_CODE: \$?; \
81-
} & \
82-
{ \
83-
echo TEST_11: Running test_struct_output_generate.py; \
84-
python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py; \
85-
echo TEST_11_EXIT_CODE: \$?; \
86-
} & \
87-
{ \
88-
echo TEST_12: Running test_moe_pallas.py; \
89-
python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py; \
90-
echo TEST_12_EXIT_CODE: \$?; \
91-
} & \
92-
# Disable the TPU LoRA tests until the feature is activated
93-
# & { \
94-
# echo TEST_13: Running test_moe_pallas.py; \
95-
# python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/; \
96-
# echo TEST_13_EXIT_CODE: \$?; \
97-
# } & \
98-
wait \
99-
&& echo 'All tests have attempted to run. Check logs for individual test statuses and exit codes.' \
100-
"
50+
vllm-tpu /bin/bash -c '
51+
set -e # Exit immediately if a command exits with a non-zero status.
52+
set -u # Treat unset variables as an error.
53+
54+
echo "--- Starting script inside Docker container ---"
55+
56+
# Create results directory
57+
RESULTS_DIR=$(mktemp -d)
58+
# If mktemp fails, set -e will cause the script to exit.
59+
echo "Results will be stored in: $RESULTS_DIR"
60+
61+
# Install dependencies
62+
echo "--- Installing Python dependencies ---"
63+
python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \
64+
&& python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \
65+
&& python3 -m pip install --progress-bar off lm_eval[api]==0.4.4
66+
echo "--- Python dependencies installed ---"
67+
export VLLM_USE_V1=1
68+
export VLLM_XLA_CHECK_RECOMPILATION=1
69+
export VLLM_XLA_CACHE_PATH=
70+
echo "Using VLLM V1"
71+
72+
echo "--- Hardware Information ---"
73+
tpu-info
74+
echo "--- Starting Tests ---"
75+
set +e
76+
overall_script_exit_code=0
77+
78+
# --- Test Definitions ---
79+
# If a test fails, this function will print logs and will not cause the main script to exit.
80+
run_test() {
81+
local test_num=$1
82+
local test_name=$2
83+
local test_command=$3
84+
local log_file="$RESULTS_DIR/test_${test_num}.log"
85+
local actual_exit_code
86+
87+
echo "--- TEST_$test_num: Running $test_name ---"
88+
89+
# Execute the test command.
90+
eval "$test_command" > >(tee -a "$log_file") 2> >(tee -a "$log_file" >&2)
91+
actual_exit_code=$?
92+
93+
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" # This goes to main log
94+
echo "TEST_${test_num}_COMMAND_EXIT_CODE: $actual_exit_code" >> "$log_file" # Also to per-test log
95+
96+
if [ "$actual_exit_code" -ne 0 ]; then
97+
echo "TEST_$test_num ($test_name) FAILED with exit code $actual_exit_code." >&2
98+
echo "--- Log for failed TEST_$test_num ($test_name) ---" >&2
99+
if [ -f "$log_file" ]; then
100+
cat "$log_file" >&2
101+
else
102+
echo "Log file $log_file not found for TEST_$test_num ($test_name)." >&2
103+
fi
104+
echo "--- End of log for TEST_$test_num ($test_name) ---" >&2
105+
return "$actual_exit_code" # Return the failure code
106+
else
107+
echo "TEST_$test_num ($test_name) PASSED."
108+
return 0 # Return success
109+
fi
110+
}
111+
112+
# Helper function to call run_test and update the overall script exit code
113+
run_and_track_test() {
114+
local test_num_arg="$1"
115+
local test_name_arg="$2"
116+
local test_command_arg="$3"
117+
118+
# Run the test
119+
run_test "$test_num_arg" "$test_name_arg" "$test_command_arg"
120+
local test_specific_exit_code=$?
121+
122+
# If the test failed, set the overall script exit code to 1
123+
if [ "$test_specific_exit_code" -ne 0 ]; then
124+
# No need for extra echo here, run_test already logged the failure.
125+
overall_script_exit_code=1
126+
fi
127+
}
128+
129+
# --- Actual Test Execution ---
130+
run_and_track_test 0 "test_perf.py" \
131+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_perf.py"
132+
run_and_track_test 1 "test_compilation.py" \
133+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_compilation.py"
134+
run_and_track_test 2 "test_basic.py" \
135+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_basic.py"
136+
run_and_track_test 3 "test_accuracy.py::test_lm_eval_accuracy_v1_engine" \
137+
"python3 -m pytest -s -v /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine"
138+
run_and_track_test 4 "test_quantization_accuracy.py" \
139+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py"
140+
run_and_track_test 5 "examples/offline_inference/tpu.py" \
141+
"python3 /workspace/vllm/examples/offline_inference/tpu.py"
142+
run_and_track_test 6 "test_tpu_model_runner.py" \
143+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/worker/test_tpu_model_runner.py"
144+
run_and_track_test 7 "test_sampler.py" \
145+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py"
146+
run_and_track_test 8 "test_topk_topp_sampler.py" \
147+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py"
148+
run_and_track_test 9 "test_multimodal.py" \
149+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py"
150+
run_and_track_test 10 "test_pallas.py" \
151+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py"
152+
run_and_track_test 11 "test_struct_output_generate.py" \
153+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/entrypoints/llm/test_struct_output_generate.py"
154+
run_and_track_test 12 "test_moe_pallas.py" \
155+
"python3 -m pytest -s -v /workspace/vllm/tests/tpu/test_moe_pallas.py"
156+
run_and_track_test 13 "test_lora.py" \
157+
"VLLM_XLA_CHECK_RECOMPILATION=0 python3 -m pytest -s -v /workspace/vllm/tests/tpu/lora/test_lora.py"
158+
159+
# After all tests have been attempted, exit with the overall status.
160+
if [ "$overall_script_exit_code" -ne 0 ]; then
161+
echo "--- One or more tests FAILED. Overall script exiting with failure code 1. ---"
162+
else
163+
echo "--- All tests have completed and PASSED. Overall script exiting with success code 0. ---"
164+
fi
165+
exit "$overall_script_exit_code"
166+
' # IMPORTANT: This is the closing single quote for the bash -c "..." command. Ensure it is present and correct.
167+
168+
# Capture the exit code of the docker run command
169+
DOCKER_RUN_EXIT_CODE=$?
101170

171+
# The trap will run for cleanup.
172+
# Exit the main script with the Docker run command's exit code.
173+
if [ "$DOCKER_RUN_EXIT_CODE" -ne 0 ]; then
174+
echo "Docker run command failed with exit code $DOCKER_RUN_EXIT_CODE."
175+
exit "$DOCKER_RUN_EXIT_CODE"
176+
else
177+
echo "Docker run command completed successfully."
178+
exit 0
179+
fi
102180
# TODO: This test fails because it uses RANDOM_SEED sampling
103-
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
181+
# pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \

0 commit comments

Comments
 (0)