Skip to content

Commit b678b55

Browse files
committed
Merge branch 'main' into eagle-fusion-sync-reduce
2 parents 1de59d5 + 0bceac9 commit b678b55

File tree

247 files changed

+10759
-5780
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

247 files changed

+10759
-5780
lines changed

.buildkite/nightly-benchmarks/nightly-annotation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Please download the visualization scripts in the post
1616
- Download `nightly-benchmarks.zip`.
1717
- In the same folder, run the following code:
1818

19-
```console
19+
```bash
2020
export HF_TOKEN=<your HF token>
2121
apt update
2222
apt install -y git

.buildkite/release-pipeline.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ steps:
102102
commands:
103103
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
104104
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
105+
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest"
105106
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
106107
env:
107108
DOCKER_BUILDKIT: "1"
@@ -117,6 +118,7 @@ steps:
117118
commands:
118119
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
119120
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:latest --progress plain -f docker/Dockerfile.neuron ."
121+
- "docker push public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:latest"
120122
- "docker push public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:$(buildkite-agent meta-data get release-version)"
121123
env:
122124
DOCKER_BUILDKIT: "1"

.buildkite/scripts/tpu/config_v6e_1.env

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu
44

55
# vllm config
66
MODEL=meta-llama/Llama-3.1-8B-Instruct
7-
MAX_NUM_SEQS=512
8-
MAX_NUM_BATCHED_TOKENS=512
7+
MAX_NUM_SEQS=256
8+
MAX_NUM_BATCHED_TOKENS=1024
99
TENSOR_PARALLEL_SIZE=1
1010
MAX_MODEL_LEN=2048
1111
DOWNLOAD_DIR=/mnt/disks/persist

.buildkite/scripts/tpu/docker_run_bm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ docker run \
6868

6969
echo "run script..."
7070
echo
71-
docker exec "$CONTAINER_NAME" /bin/bash -c ".buildkite/scripts/hardware_ci/run_bm.sh"
71+
docker exec "$CONTAINER_NAME" /bin/bash -c ".buildkite/scripts/tpu/run_bm.sh"
7272

7373
echo "copy result back..."
7474
VLLM_LOG="$LOG_ROOT/$TEST_NAME"_vllm_log.txt

.buildkite/test-pipeline.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,13 +615,18 @@ steps:
615615
- vllm/executor/
616616
- vllm/model_executor/models/
617617
- tests/distributed/
618+
- tests/examples/offline_inference/data_parallel.py
618619
commands:
619620
- # the following commands are for the first node, with ip 192.168.10.10 (ray environment already set up)
620621
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'
622+
- NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
623+
- python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=0 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
621624
- VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py
622625
- VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py
623626
- # the following commands are for the second node, with ip 192.168.10.11 (ray environment already set up)
624627
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'
628+
- NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
629+
- python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=1 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
625630

626631
- label: Distributed Tests (2 GPUs) # 40min
627632
mirror_hardwares: [amdexperimental]

.github/CODEOWNERS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
/vllm/entrypoints @aarnphm
1919
CMakeLists.txt @tlrmchlsmth
2020

21+
# Any change to the VllmConfig changes can have a large user-facing impact,
22+
# so spam a lot of people
23+
/vllm/config.py @simon-mo @WoosukKwon @youkaichao @robertgshaw2-redhat @mgoin @tlrmchlsmth @houseroad @hmellor
24+
2125
# vLLM V1
2226
/vllm/v1 @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat
2327
/vllm/v1/structured_output @mgoin @russellb @aarnphm

.github/mergify.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ pull_request_rules:
4545
- files~=^vllm/entrypoints/openai/tool_parsers/llama.*\.py
4646
- files~=^vllm/model_executor/models/.*llama.*\.py
4747
- files~=^vllm/transformers_utils/configs/.*llama.*\.py
48+
- title~=(?i)llama
4849
actions:
4950
label:
5051
add:
@@ -65,6 +66,19 @@ pull_request_rules:
6566
add:
6667
- multi-modality
6768

69+
- name: label-performance
70+
description: Automatically apply performance label
71+
conditions:
72+
- or:
73+
- files~=^benchmarks/
74+
- files~=^vllm/benchmarks/
75+
- files~=^tests/benchmarks/
76+
- files~=^\.buildkite/nightly-benchmarks/
77+
actions:
78+
label:
79+
add:
80+
- performance
81+
6882
- name: label-qwen
6983
description: Automatically apply qwen label
7084
conditions:
@@ -74,7 +88,6 @@ pull_request_rules:
7488
- files~=^vllm/model_executor/models/.*qwen.*\.py
7589
- files~=^vllm/reasoning/.*qwen.*\.py
7690
- title~=(?i)Qwen
77-
- body~=(?i)Qwen
7891
actions:
7992
label:
8093
add:

.pre-commit-config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ repos:
115115
entry: python tools/check_spdx_header.py
116116
language: python
117117
types: [python]
118+
- id: check-root-lazy-imports
119+
name: Check root lazy imports
120+
entry: python tools/check_init_lazy_imports.py
121+
language: python
122+
types: [python]
118123
- id: check-filenames
119124
name: Check for spaces in all filenames
120125
entry: bash

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,13 @@ If you use vLLM for your research, please cite our [paper](https://arxiv.org/abs
154154

155155
## Contact Us
156156

157+
<!-- --8<-- [start:contact-us] -->
157158
- For technical questions and feature requests, please use GitHub [Issues](https://github.com/vllm-project/vllm/issues) or [Discussions](https://github.com/vllm-project/vllm/discussions)
158159
- For discussing with fellow users, please use the [vLLM Forum](https://discuss.vllm.ai)
159160
- For coordinating contributions and development, please use [Slack](https://slack.vllm.ai)
160161
- For security disclosures, please use GitHub's [Security Advisories](https://github.com/vllm-project/vllm/security/advisories) feature
161162
- For collaborations and partnerships, please contact us at [vllm-questions@lists.berkeley.edu](mailto:vllm-questions@lists.berkeley.edu)
163+
<!-- --8<-- [end:contact-us] -->
162164

163165
## Media Kit
164166

0 commit comments

Comments
 (0)