Skip to content

Commit c3f61dd

Browse files
authored
Merge pull request #522 from ROCm/upstream_merge_2025_04_21
Upstream merge 2025 04 21
2 parents c383e6c + cfc530a commit c3f61dd

File tree

549 files changed

+34935
-12607
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

549 files changed

+34935
-12607
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16 -b auto -l 1319 -f 5 -t 1
2+
model_name: "nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.30
8+
- name: "exact_match,flexible-extract"
9+
value: 0.465
10+
limit: 1319
11+
num_fewshot: 5

.buildkite/lm-eval-harness/configs/models-small.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
44
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
55
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
66
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
7-
Minitron-4B-Base-FP8.yaml
7+
Qwen1.5-MoE-W4A16-compressed-tensors.yaml
88
Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml
99
Qwen2-1.5B-Instruct-FP8W8.yaml
1010
Meta-Llama-3-8B-QQQ.yaml

.buildkite/release-pipeline.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,18 @@ steps:
8686
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
8787
env:
8888
DOCKER_BUILDKIT: "1"
89+
90+
- block: "Build Neuron release image"
91+
key: block-neuron-release-image-build
92+
depends_on: ~
93+
94+
- label: "Build and publish Neuron release image"
95+
depends_on: block-neuron-release-image-build
96+
agents:
97+
queue: neuron-postmerge
98+
commands:
99+
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
100+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:latest --progress plain -f docker/Dockerfile.neuron ."
101+
- "docker push public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:$(buildkite-agent meta-data get release-version)"
102+
env:
103+
DOCKER_BUILDKIT: "1"

.buildkite/scripts/hardware_ci/run-amd-test.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,13 @@ if [[ $commands == *" kernels "* ]]; then
9898
--ignore=kernels/test_machete_mm.py \
9999
--ignore=kernels/test_mha_attn.py \
100100
--ignore=kernels/test_block_fp8.py \
101+
--ignore=kernels/test_cutlass_moe.py \
102+
--ignore=kernels/test_mamba_ssm_ssd.py \
103+
--ignore=kernels/test_attention.py \
104+
--ignore=kernels/test_block_int8.py \
105+
--ignore=kernels/test_fused_quant_layernorm.py \
106+
--ignore=kernels/test_int8_kernel.py \
107+
--ignore=kernels/test_triton_moe_ptpc_fp8.py \
101108
--ignore=kernels/test_permute_cols.py"
102109
fi
103110

.buildkite/scripts/hardware_ci/run-cpu-test-ppc64le.sh

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,34 @@
55
set -ex
66

77
# Setup cleanup
8-
remove_docker_container() { docker rm -f cpu-test || true; docker system prune -f; }
8+
remove_docker_container() { podman rm -f cpu-test-ubi9-ppc || true; podman system prune -f; }
99
trap remove_docker_container EXIT
1010
remove_docker_container
1111

1212
# Try building the docker image
13-
docker build -t cpu-test -f docker/Dockerfile.ppc64le .
13+
podman build -t cpu-test-ubi9-ppc -f docker/Dockerfile.ppc64le .
14+
15+
# Run the image
16+
podman run -itd --entrypoint /bin/bash -v /tmp/:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --name cpu-test-ubi9-ppc cpu-test-ubi9-ppc
17+
18+
function cpu_tests() {
19+
20+
# offline inference
21+
podman exec cpu-test-ubi9-ppc bash -c "
22+
set -e
23+
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m"
24+
25+
# Run basic model test
26+
podman exec cpu-test-ubi9-ppc bash -c "
27+
set -e
28+
pip install pytest pytest-asyncio einops peft Pillow soundfile transformers_stream_generator matplotlib
29+
pip install sentence-transformers datamodel_code_generator
30+
pytest -v -s tests/models/embedding/language/test_cls_models.py::test_classification_models[float-jason9693/Qwen2.5-1.5B-apeach]
31+
pytest -v -s tests/models/embedding/language/test_embedding.py::test_models[half-BAAI/bge-base-en-v1.5]
32+
pytest -v -s tests/models/encoder_decoder/language -m cpu_model"
33+
}
34+
35+
# All of CPU tests are expected to be finished less than 40 mins.
36+
export -f cpu_tests
37+
timeout 40m bash -c cpu_tests
1438

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/bash
2+
3+
# This script build the CPU docker image and run the offline inference inside the container.
4+
# It serves a sanity check for compilation and basic model usage.
5+
set -ex
6+
7+
# Setup cleanup
8+
remove_docker_container() { docker rm -f cpu-test || true; docker system prune -f; }
9+
trap remove_docker_container EXIT
10+
remove_docker_container
11+
12+
# Try building the docker image
13+
docker build -t cpu-test -f docker/Dockerfile.s390x .

.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ source /etc/environment
1717
docker run --privileged --net host --shm-size=16G -it \
1818
-e "HF_TOKEN=$HF_TOKEN" --name tpu-test \
1919
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
20-
&& python3 -m pip install pytest \
20+
&& python3 -m pip install pytest pytest-asyncio tpu-info \
2121
&& python3 -m pip install lm_eval[api]==0.4.4 \
2222
&& export VLLM_USE_V1=1 \
2323
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
24+
&& echo HARDWARE \
25+
&& tpu-info \
2426
&& echo TEST_0 \
2527
&& pytest -v -s /workspace/vllm/tests/v1/tpu/test_perf.py \
2628
&& echo TEST_1 \
@@ -40,6 +42,8 @@ docker run --privileged --net host --shm-size=16G -it \
4042
&& echo TEST_8 \
4143
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_topk_topp_sampler.py \
4244
&& echo TEST_9 \
45+
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_multimodal.py \
46+
&& echo TEST_10 \
4347
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_pallas.py" \
4448

4549

.buildkite/test-pipeline.yaml

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ steps:
121121
- pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process
122122
- pytest -v -s entrypoints/llm/test_generate_multiple_loras.py # it needs a clean process
123123
- VLLM_USE_V1=0 pytest -v -s entrypoints/llm/test_guided_generate.py # it needs a clean process
124-
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/
124+
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_openai_schema.py
125125
- pytest -v -s entrypoints/test_chat_utils.py
126126
- VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests
127127

@@ -166,11 +166,6 @@ steps:
166166
- tests/tracing
167167
commands:
168168
- pytest -v -s metrics
169-
- "pip install \
170-
'opentelemetry-sdk>=1.26.0,<1.27.0' \
171-
'opentelemetry-api>=1.26.0,<1.27.0' \
172-
'opentelemetry-exporter-otlp>=1.26.0,<1.27.0' \
173-
'opentelemetry-semantic-conventions-ai>=0.4.1,<0.5.0'"
174169
- pytest -v -s tracing
175170

176171
##### fast check tests #####
@@ -214,6 +209,7 @@ steps:
214209
- pytest -v -s v1/sample
215210
- pytest -v -s v1/worker
216211
- pytest -v -s v1/structured_output
212+
- pytest -v -s v1/spec_decode
217213
- pytest -v -s v1/test_stats.py
218214
- pytest -v -s v1/test_utils.py
219215
- pytest -v -s v1/test_oracle.py
@@ -300,6 +296,14 @@ steps:
300296
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
301297
parallelism: 4
302298

299+
- label: PyTorch Compilation Unit Tests
300+
source_file_dependencies:
301+
- vllm/
302+
- tests/compile
303+
commands:
304+
- pytest -v -s compile/test_pass_manager.py
305+
- pytest -v -s compile/test_fusion.py
306+
303307
- label: PyTorch Fullgraph Smoke Test # 9min
304308
source_file_dependencies:
305309
- vllm/
@@ -309,7 +313,6 @@ steps:
309313
# these tests need to be separated, cannot combine
310314
- pytest -v -s compile/piecewise/test_simple.py
311315
- pytest -v -s compile/piecewise/test_toy_llama.py
312-
- pytest -v -s compile/test_pass_manager.py
313316

314317
- label: PyTorch Fullgraph Test # 18min
315318
source_file_dependencies:
@@ -350,6 +353,13 @@ steps:
350353
commands:
351354
- bash scripts/run-benchmarks.sh
352355

356+
- label: Benchmarks CLI Test # 10min
357+
source_file_dependencies:
358+
- vllm/
359+
- tests/benchmarks/
360+
commands:
361+
- pytest -v -s benchmarks/
362+
353363
- label: Quantization Test # 33min
354364
source_file_dependencies:
355365
- csrc/
@@ -388,8 +398,10 @@ steps:
388398
source_file_dependencies:
389399
- vllm/
390400
- tests/tool_use
401+
- tests/mistral_tool_use
391402
commands:
392403
- pytest -v -s tool_use
404+
- pytest -v -s mistral_tool_use
393405

394406
##### models test #####
395407

@@ -401,8 +413,9 @@ steps:
401413
- pytest -v -s models/test_transformers.py
402414
- pytest -v -s models/test_registry.py
403415
# V1 Test: https://github.com/vllm-project/vllm/issues/14531
404-
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4'
416+
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'
405417
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'llama4'
418+
- VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'plamo2'
406419

407420
- label: Language Models Test (Standard) # 32min
408421
#mirror_hardwares: [amd]
@@ -412,6 +425,8 @@ steps:
412425
- tests/models/embedding/language
413426
- tests/models/encoder_decoder/language
414427
commands:
428+
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
429+
- pip install causal-conv1d
415430
- pytest -v -s models/decoder_only/language -m 'core_model or quant_model'
416431
- pytest -v -s models/embedding/language -m core_model
417432

@@ -423,6 +438,8 @@ steps:
423438
- tests/models/embedding/language
424439
- tests/models/encoder_decoder/language
425440
commands:
441+
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
442+
- pip install causal-conv1d
426443
- pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model'
427444
- pytest -v -s models/embedding/language -m 'not core_model'
428445

@@ -439,7 +456,7 @@ steps:
439456
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
440457
- pytest -v -s models/multimodal
441458
- pytest -v -s models/decoder_only/audio_language -m 'core_model or quant_model'
442-
- pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'core_model or quant_model'
459+
- pytest -v -s models/decoder_only/vision_language -m 'core_model or quant_model'
443460
- pytest -v -s models/embedding/vision_language -m core_model
444461
- pytest -v -s models/encoder_decoder/audio_language -m core_model
445462
- pytest -v -s models/encoder_decoder/language -m core_model
@@ -458,10 +475,7 @@ steps:
458475
- pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
459476
- pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model'
460477
- pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=0) and not core_model and not quant_model'
461-
# HACK - run phi3v tests separately to sidestep this transformers bug
462-
# https://github.com/huggingface/transformers/issues/34307
463-
- pytest -v -s models/decoder_only/vision_language/test_phi3v.py
464-
- pytest -v -s --ignore models/decoder_only/vision_language/test_models.py --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'not core_model and not quant_model'
478+
- pytest -v -s --ignore models/decoder_only/vision_language/test_models.py models/decoder_only/vision_language -m 'not core_model and not quant_model'
465479
- pytest -v -s models/embedding/vision_language -m 'not core_model'
466480
- pytest -v -s models/encoder_decoder/language -m 'not core_model'
467481
- pytest -v -s models/encoder_decoder/vision_language -m 'not core_model'
@@ -551,6 +565,7 @@ steps:
551565
# - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
552566
- VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
553567
- VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/test_disagg.py
568+
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
554569

555570
- label: Plugin Tests (2 GPUs) # 40min
556571
working_dir: "/vllm-workspace/tests"

.github/ISSUE_TEMPLATE/200-installation.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ body:
1414
description: |
1515
Please run the following and paste the output below.
1616
```sh
17-
wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
17+
wget https://raw.githubusercontent.com/vllm-project/vllm/main/vllm/collect_env.py
1818
# For security purposes, please feel free to check the contents of collect_env.py before running it.
1919
python collect_env.py
2020
```

.github/ISSUE_TEMPLATE/300-usage.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ body:
1414
description: |
1515
Please run the following and paste the output below.
1616
```sh
17-
wget https://raw.githubusercontent.com/vllm-project/vllm/main/collect_env.py
17+
wget https://raw.githubusercontent.com/vllm-project/vllm/main/vllm/collect_env.py
1818
# For security purposes, please feel free to check the contents of collect_env.py before running it.
1919
python collect_env.py
2020
```

0 commit comments

Comments
 (0)