Skip to content

Commit 4c13d74

Browse files
committed
Resolve merge conflicts and remove deprecated pending handshake logic
Signed-off-by: Will Eaton <weaton@redhat.com>
2 parents fbf0630 + 2c121d5 commit 4c13d74

File tree

379 files changed

+14632
-6468
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

379 files changed

+14632
-6468
lines changed

.buildkite/nightly-benchmarks/nightly-annotation.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Please download the visualization scripts in the post
1616
- Download `nightly-benchmarks.zip`.
1717
- In the same folder, run the following code:
1818

19-
```console
19+
```bash
2020
export HF_TOKEN=<your HF token>
2121
apt update
2222
apt install -y git

.buildkite/release-pipeline.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ steps:
102102
commands:
103103
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
104104
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
105+
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest"
105106
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
106107
env:
107108
DOCKER_BUILDKIT: "1"
@@ -117,6 +118,7 @@ steps:
117118
commands:
118119
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
119120
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:latest --progress plain -f docker/Dockerfile.neuron ."
121+
- "docker push public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:latest"
120122
- "docker push public.ecr.aws/q9t5s3a7/vllm-neuron-release-repo:$(buildkite-agent meta-data get release-version)"
121123
env:
122124
DOCKER_BUILDKIT: "1"

.buildkite/scripts/hardware_ci/run-neuron-test.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,11 @@ docker run --rm -it --device=/dev/neuron0 --network bridge \
5454
--name "${container_name}" \
5555
${image_name} \
5656
/bin/bash -c "
57+
set -e; # Exit on first error
5758
python3 /workspace/vllm/examples/offline_inference/neuron.py;
5859
python3 -m pytest /workspace/vllm/tests/neuron/1_core/ -v --capture=tee-sys;
5960
for f in /workspace/vllm/tests/neuron/2_core/*.py; do
60-
echo 'Running test file: '$f;
61+
echo \"Running test file: \$f\";
6162
python3 -m pytest \$f -v --capture=tee-sys;
6263
done
6364
"

.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ run_and_track_test 14 "test_tpu_qkv_linear.py" \
159159
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_tpu_qkv_linear.py"
160160
run_and_track_test 15 "test_spmd_model_weight_loading.py" \
161161
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_spmd_model_weight_loading.py"
162+
run_and_track_test 16 "test_kv_cache_update_kernel.py" \
163+
"python3 -m pytest -s -v /workspace/vllm/tests/v1/tpu/test_kv_cache_update_kernel.py"
162164
163165
# After all tests have been attempted, exit with the overall status.
164166
if [ "$overall_script_exit_code" -ne 0 ]; then

.buildkite/scripts/hardware_ci/run-xpu-test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,5 @@ docker run \
2828
sh -c '
2929
VLLM_USE_V1=0 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m
3030
VLLM_USE_V1=0 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m -tp 2
31+
VLLM_USE_V1=1 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager
3132
'

.buildkite/scripts/tpu/config_v6e_1.env

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ CONTAINER_NAME=vllm-tpu
44

55
# vllm config
66
MODEL=meta-llama/Llama-3.1-8B-Instruct
7-
MAX_NUM_SEQS=512
8-
MAX_NUM_BATCHED_TOKENS=512
7+
MAX_NUM_SEQS=256
8+
MAX_NUM_BATCHED_TOKENS=1024
99
TENSOR_PARALLEL_SIZE=1
1010
MAX_MODEL_LEN=2048
1111
DOWNLOAD_DIR=/mnt/disks/persist

.buildkite/scripts/tpu/docker_run_bm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ docker run \
6868

6969
echo "run script..."
7070
echo
71-
docker exec "$CONTAINER_NAME" /bin/bash -c ".buildkite/scripts/hardware_ci/run_bm.sh"
71+
docker exec "$CONTAINER_NAME" /bin/bash -c ".buildkite/scripts/tpu/run_bm.sh"
7272

7373
echo "copy result back..."
7474
VLLM_LOG="$LOG_ROOT/$TEST_NAME"_vllm_log.txt

.buildkite/test-pipeline.yaml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ steps:
8989
- VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
9090

9191
- label: Chunked Prefill Test
92-
mirror_hardwares: [amdexperimental]
92+
mirror_hardwares: [amdexperimental, amdproduction]
9393
source_file_dependencies:
9494
- vllm/
9595
- tests/basic_correctness/test_chunked_prefill
@@ -271,6 +271,15 @@ steps:
271271
commands:
272272
- pytest -v -s prefix_caching
273273

274+
275+
- label: Platform Tests (CUDA)
276+
mirror_hardwares: [amdexperimental]
277+
source_file_dependencies:
278+
- vllm/
279+
- tests/cuda
280+
commands:
281+
- pytest -v -s cuda/test_cuda_context.py
282+
274283
- label: Samplers Test # 36min
275284
mirror_hardwares: [amdexperimental]
276285
source_file_dependencies:
@@ -606,13 +615,18 @@ steps:
606615
- vllm/executor/
607616
- vllm/model_executor/models/
608617
- tests/distributed/
618+
- tests/examples/offline_inference/data_parallel.py
609619
commands:
610620
- # the following commands are for the first node, with ip 192.168.10.10 (ray environment already set up)
611621
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'
622+
- NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
623+
- python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=0 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
612624
- VLLM_MULTI_NODE=1 pytest -v -s distributed/test_multi_node_assignment.py
613625
- VLLM_MULTI_NODE=1 pytest -v -s distributed/test_pipeline_parallel.py
614626
- # the following commands are for the second node, with ip 192.168.10.11 (ray environment already set up)
615627
- VLLM_TEST_SAME_HOST=0 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_same_node.py | grep 'Same node test passed'
628+
- NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed'
629+
- python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=1 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code
616630

617631
- label: Distributed Tests (2 GPUs) # 40min
618632
mirror_hardwares: [amdexperimental]

.github/CODEOWNERS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
/vllm/entrypoints @aarnphm
1919
CMakeLists.txt @tlrmchlsmth
2020

21+
# Any change to the VllmConfig changes can have a large user-facing impact,
22+
# so spam a lot of people
23+
/vllm/config.py @simon-mo @WoosukKwon @youkaichao @robertgshaw2-redhat @mgoin @tlrmchlsmth @houseroad @hmellor
24+
2125
# vLLM V1
2226
/vllm/v1 @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat
2327
/vllm/v1/structured_output @mgoin @russellb @aarnphm

.github/mergify.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ pull_request_rules:
4545
- files~=^vllm/entrypoints/openai/tool_parsers/llama.*\.py
4646
- files~=^vllm/model_executor/models/.*llama.*\.py
4747
- files~=^vllm/transformers_utils/configs/.*llama.*\.py
48+
- title~=(?i)llama
4849
actions:
4950
label:
5051
add:
@@ -65,6 +66,19 @@ pull_request_rules:
6566
add:
6667
- multi-modality
6768

69+
- name: label-performance
70+
description: Automatically apply performance label
71+
conditions:
72+
- or:
73+
- files~=^benchmarks/
74+
- files~=^vllm/benchmarks/
75+
- files~=^tests/benchmarks/
76+
- files~=^\.buildkite/nightly-benchmarks/
77+
actions:
78+
label:
79+
add:
80+
- performance
81+
6882
- name: label-qwen
6983
description: Automatically apply qwen label
7084
conditions:
@@ -74,7 +88,6 @@ pull_request_rules:
7488
- files~=^vllm/model_executor/models/.*qwen.*\.py
7589
- files~=^vllm/reasoning/.*qwen.*\.py
7690
- title~=(?i)Qwen
77-
- body~=(?i)Qwen
7891
actions:
7992
label:
8093
add:

0 commit comments

Comments
 (0)