Skip to content

Commit 1c450a5

Browse files
committed
Merge remote-tracking branch 'upstream/main' into upstream_merge_2025_05_21
2 parents 7c1213e + 5873877 commit 1c450a5

File tree

637 files changed

+16980
-13289
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

637 files changed

+16980
-13289
lines changed

.buildkite/pyproject.toml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,6 @@
66

77
[tool.ruff]
88
line-length = 88
9-
exclude = [
10-
# External file, leaving license intact
11-
"examples/other/fp8/quantizer/quantize.py",
12-
"vllm/vllm_flash_attn/flash_attn_interface.pyi"
13-
]
149

1510
[tool.ruff.lint.per-file-ignores]
1611
"vllm/third_party/**" = ["ALL"]

.buildkite/scripts/hardware_ci/run-hpu-test.sh

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,17 @@ docker build -t hpu-test-env -f docker/Dockerfile.hpu .
1010
# Setup cleanup
1111
# certain versions of HPU software stack have a bug that can
1212
# override the exit code of the script, so we need to use
13-
# separate remove_docker_container and remove_docker_container_and_exit
13+
# separate remove_docker_containers and remove_docker_containers_and_exit
1414
# functions, while other platforms only need one remove_docker_container
1515
# function.
1616
EXITCODE=1
17-
remove_docker_container() { docker rm -f hpu-test || true; }
18-
remove_docker_container_and_exit() { remove_docker_container; exit $EXITCODE; }
19-
trap remove_docker_container_and_exit EXIT
20-
remove_docker_container
17+
remove_docker_containers() { docker rm -f hpu-test || true; docker rm -f hpu-test-tp2 || true; }
18+
remove_docker_containers_and_exit() { remove_docker_containers; exit $EXITCODE; }
19+
trap remove_docker_containers_and_exit EXIT
20+
remove_docker_containers
2121

2222
# Run the image and launch offline inference
2323
docker run --runtime=habana --name=hpu-test --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
24+
docker run --runtime=habana --name=hpu-test-tp2 --network=host -e HABANA_VISIBLE_DEVICES=all -e VLLM_SKIP_WARMUP=true --entrypoint="" hpu-test-env python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --tensor-parallel-size 2
25+
2426
EXITCODE=$?

.buildkite/scripts/hardware_ci/run-neuron-test.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,11 @@ docker run --rm -it --device=/dev/neuron0 --network bridge \
5353
-e "NEURON_COMPILE_CACHE_URL=${NEURON_COMPILE_CACHE_MOUNT}" \
5454
--name "${container_name}" \
5555
${image_name} \
56-
/bin/bash -c "python3 /workspace/vllm/examples/offline_inference/neuron.py && python3 -m pytest /workspace/vllm/tests/neuron/1_core/ -v --capture=tee-sys && python3 -m pytest /workspace/vllm/tests/neuron/2_core/ -v --capture=tee-sys"
56+
/bin/bash -c "
57+
python3 /workspace/vllm/examples/offline_inference/neuron.py;
58+
python3 -m pytest /workspace/vllm/tests/neuron/1_core/ -v --capture=tee-sys;
59+
for f in /workspace/vllm/tests/neuron/2_core/*.py; do
60+
echo 'Running test file: '$f;
61+
python3 -m pytest \$f -v --capture=tee-sys;
62+
done
63+
"

.buildkite/test-pipeline.yaml

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,13 @@ steps:
3333

3434
- label: Documentation Build # 2min
3535
mirror_hardwares: [amdexperimental]
36-
working_dir: "/vllm-workspace/test_docs/docs"
36+
working_dir: "/vllm-workspace/test_docs"
3737
fast_check: true
3838
no_gpu: True
3939
commands:
40-
- pip install -r ../../requirements/docs.txt
41-
- SPHINXOPTS=\"-W\" make html
42-
# Check API reference (if it fails, you may have missing mock imports)
43-
- grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html
40+
- pip install -r ../requirements/docs.txt
41+
# TODO: add `--strict` once warnings in docstrings are fixed
42+
- mkdocs build
4443

4544
- label: Async Engine, Inputs, Utils, Worker Test # 24min
4645
mirror_hardwares: [amdexperimental]
@@ -59,6 +58,7 @@ steps:
5958
- pytest -v -s async_engine # AsyncLLMEngine
6059
- NUM_SCHEDULER_STEPS=4 pytest -v -s async_engine/test_async_llm_engine.py
6160
- pytest -v -s test_inputs.py
61+
- pytest -v -s test_outputs.py
6262
- pytest -v -s multimodal
6363
- pytest -v -s test_utils.py # Utils
6464
- pytest -v -s worker # Worker
@@ -128,7 +128,7 @@ steps:
128128
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
129129
- pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
130130
- VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
131-
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
131+
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/
132132
- pytest -v -s entrypoints/test_chat_utils.py
133133
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
134134

@@ -226,6 +226,7 @@ steps:
226226
- pytest -v -s v1/test_serial_utils.py
227227
- pytest -v -s v1/test_utils.py
228228
- pytest -v -s v1/test_oracle.py
229+
- pytest -v -s v1/test_metrics_reader.py
229230
# TODO: accuracy does not match, whether setting
230231
# VLLM_USE_FLASHINFER_SAMPLER or not on H100.
231232
- pytest -v -s v1/e2e
@@ -250,7 +251,7 @@ steps:
250251
- python3 offline_inference/vision_language.py --seed 0
251252
- python3 offline_inference/vision_language_embedding.py --seed 0
252253
- python3 offline_inference/vision_language_multi_image.py --seed 0
253-
- VLLM_USE_V1=0 python3 other/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
254+
- VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
254255
- python3 offline_inference/encoder_decoder.py
255256
- python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
256257
- python3 offline_inference/basic/classify.py
@@ -322,6 +323,7 @@ steps:
322323
- pytest -v -s compile/test_fusion.py
323324
- pytest -v -s compile/test_silu_mul_quant_fusion.py
324325
- pytest -v -s compile/test_sequence_parallelism.py
326+
- pytest -v -s compile/test_async_tp.py
325327

326328
- label: PyTorch Fullgraph Smoke Test # 9min
327329
mirror_hardwares: [amdexperimental, amdproduction]
@@ -399,10 +401,12 @@ steps:
399401
source_file_dependencies:
400402
- vllm/model_executor/model_loader
401403
- tests/tensorizer_loader
404+
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
402405
commands:
403406
- apt-get update && apt-get install -y curl libsodium23
404407
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
405408
- pytest -v -s tensorizer_loader
409+
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
406410

407411
- label: Benchmarks # 9min
408412
mirror_hardwares: [amdexperimental, amdproduction]
@@ -481,10 +485,7 @@ steps:
481485
- pytest -v -s models/test_registry.py
482486
- pytest -v -s models/test_utils.py
483487
- pytest -v -s models/test_vision.py
484-
# V1 Test: https://github.com/vllm-project/vllm/issues/14531
485-
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'
486-
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4'
487-
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
488+
- pytest -v -s models/test_initialization.py
488489

489490
- label: Language Models Test (Standard)
490491
mirror_hardwares: [amdexperimental]
@@ -498,16 +499,25 @@ steps:
498499
- pip freeze | grep -E 'torch'
499500
- pytest -v -s models/language -m core_model
500501

501-
- label: Language Models Test (Extended)
502+
- label: Language Models Test (Extended Generation) # 1hr20min
502503
mirror_hardwares: [amdexperimental]
503504
optional: true
504505
source_file_dependencies:
505506
- vllm/
506-
- tests/models/language
507+
- tests/models/language/generation
507508
commands:
508509
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
509510
- pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
510-
- pytest -v -s models/language -m 'not core_model'
511+
- pytest -v -s models/language/generation -m 'not core_model'
512+
513+
- label: Language Models Test (Extended Pooling) # 36min
514+
mirror_hardwares: [amdexperimental]
515+
optional: true
516+
source_file_dependencies:
517+
- vllm/
518+
- tests/models/language/pooling
519+
commands:
520+
- pytest -v -s models/language/pooling -m 'not core_model'
511521

512522
- label: Multi-Modal Models Test (Standard)
513523
mirror_hardwares: [amdexperimental]

.github/ISSUE_TEMPLATE/400-bug-report.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,14 @@ body:
8181
required: true
8282
- type: markdown
8383
attributes:
84-
value: >
85-
⚠️ Please separate bugs of `transformers` implementation or usage from bugs of `vllm`. If you think anything is wrong with the models' output:
84+
value: |
85+
⚠️ Please separate bugs of `transformers` implementation or usage from bugs of `vllm`. If you think anything is wrong with the model's output:
8686
8787
- Try the counterpart of `transformers` first. If the error appears, please go to [their issues](https://github.com/huggingface/transformers/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc).
8888
8989
- If the error only appears in vllm, please provide the detailed script of how you run `transformers` and `vllm`, also highlight the difference and what you expect.
9090
91-
Thanks for contributing 🎉!
91+
Thanks for reporting 🙏!
9292
- type: checkboxes
9393
id: askllm
9494
attributes:
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
name: 🧪 CI failure report
2+
description: Report a failing test.
3+
title: "[CI Failure]: "
4+
labels: ["ci-failure"]
5+
6+
body:
7+
- type: markdown
8+
attributes:
9+
value: >
10+
#### Include the name of the failing Buildkite step and test file in the title.
11+
- type: input
12+
attributes:
13+
label: Name of failing test
14+
description: |
15+
Paste in the fully-qualified name of the failing test from the logs.
16+
placeholder: |
17+
`path/to/test_file.py::test_name[params]`
18+
validations:
19+
required: true
20+
- type: checkboxes
21+
attributes:
22+
label: Basic information
23+
description: Select all items that apply to the failing test.
24+
options:
25+
- label: Flaky test
26+
- label: Can reproduce locally
27+
- label: Caused by external libraries (e.g. bug in `transformers`)
28+
- type: textarea
29+
attributes:
30+
label: 🧪 Describe the failing test
31+
description: |
32+
Please provide a clear and concise description of the failing test.
33+
placeholder: |
34+
A clear and concise description of the failing test.
35+
36+
```
37+
The error message you got, with the full traceback and the error logs with [dump_input.py:##] if present.
38+
```
39+
validations:
40+
required: true
41+
- type: textarea
42+
attributes:
43+
label: 📝 History of failing test
44+
description: |
45+
Since when did the test start to fail?
46+
You can look up its history via [Buildkite Test Suites](https://buildkite.com/organizations/vllm/analytics/suites/ci-1/tests?branch=main).
47+
48+
If you have time, identify the PR that caused the test to fail on main. You can do so via the following methods:
49+
50+
- Use Buildkite Test Suites to find the PR where the test failure first occurred, and reproduce the failure locally.
51+
52+
- Run [`git bisect`](https://git-scm.com/docs/git-bisect) locally.
53+
54+
- Manually unblock Buildkite steps for suspected PRs on main and check the results. (authorized users only)
55+
placeholder: |
56+
Approximate timeline and/or problematic PRs
57+
58+
A link to the Buildkite analytics of the failing test (if available)
59+
validations:
60+
required: true
61+
- type: textarea
62+
attributes:
63+
label: CC List.
64+
description: >
65+
The list of people you want to CC. Usually, this includes those who worked on the PR that failed the test.
66+
- type: markdown
67+
attributes:
68+
value: >
69+
Thanks for reporting 🙏!

.github/mergify.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ pull_request_rules:
5858
- files~=^benchmarks/structured_schemas/
5959
- files=benchmarks/benchmark_serving_structured_output.py
6060
- files=benchmarks/run_structured_output_benchmark.sh
61-
- files=docs/source/features/structured_outputs.md
61+
- files=docs/features/structured_outputs.md
6262
- files=examples/offline_inference/structured_outputs.py
6363
- files=examples/online_serving/openai_chat_completion_structured_outputs.py
6464
- files=examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
@@ -135,9 +135,7 @@ pull_request_rules:
135135
- files~=^tests/entrypoints/openai/tool_parsers/
136136
- files=tests/entrypoints/openai/test_chat_with_tool_reasoning.py
137137
- files~=^vllm/entrypoints/openai/tool_parsers/
138-
- files=docs/source/features/tool_calling.md
139-
- files=docs/source/getting_started/examples/openai_chat_completion_client_with_tools.md
140-
- files=docs/source/getting_started/examples/chat_with_tools.md
138+
- files=docs/features/tool_calling.md
141139
- files~=^examples/tool_chat_*
142140
- files=examples/offline_inference/chat_with_tools.py
143141
- files=examples/online_serving/openai_chat_completion_client_with_tools_required.py

.github/scripts/cleanup_pr_body.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ sed -i '/\*\*BEFORE SUBMITTING, PLEASE READ.*\*\*/,$d' "${NEW}"
2626

2727
# Remove HTML <details> section that includes <summary> text of "PR Checklist (Click to Expand)"
2828
python3 - <<EOF
29-
import re
29+
import regex as re
3030
3131
with open("${NEW}", "r") as file:
3232
content = file.read()

.github/workflows/cleanup_pr_body.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ jobs:
2020
with:
2121
python-version: '3.12'
2222

23+
- name: Install Python dependencies
24+
run: |
25+
python3 -m pip install --upgrade pip
26+
python3 -m pip install regex
27+
2328
- name: Update PR description
2429
env:
2530
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26-
run: .github/scripts/cleanup_pr_body.sh "${{ github.event.number }}"
31+
run: bash .github/scripts/cleanup_pr_body.sh "${{ github.event.number }}"

.gitignore

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,6 @@ instance/
7777
# Scrapy stuff:
7878
.scrapy
7979

80-
# Sphinx documentation
81-
docs/_build/
82-
docs/source/getting_started/examples/
83-
docs/source/api/vllm
84-
8580
# PyBuilder
8681
.pybuilder/
8782
target/
@@ -151,6 +146,7 @@ venv.bak/
151146

152147
# mkdocs documentation
153148
/site
149+
docs/examples
154150

155151
# mypy
156152
.mypy_cache/

0 commit comments

Comments
 (0)