From ae220c515b93ff1c9269b849b51b300287c2f09e Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Sat, 17 May 2025 03:53:12 +0000 Subject: [PATCH 1/4] [CI/UT][PD Disaggreate] Initialize PD Disaggreate UT Signed-off-by: MengqingCao --- requirements-dev.txt | 1 + tests/e2e/pd_disaggreate/setup_pd.sh | 138 ++++++++++++++++++ tests/e2e/pd_disaggreate/test_pd_e2e.py | 60 ++++++++ tests/e2e/run_disagg_pd.sh | 48 ++++++ .../distributed/kv_transfer/simple_pipe.py | 2 +- 5 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 tests/e2e/pd_disaggreate/setup_pd.sh create mode 100644 tests/e2e/pd_disaggreate/test_pd_e2e.py create mode 100644 tests/e2e/run_disagg_pd.sh diff --git a/requirements-dev.txt b/requirements-dev.txt index 4fb45d11b..f1aed477e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,3 +9,4 @@ ray types-jsonschema xgrammar zmq +quart diff --git a/tests/e2e/pd_disaggreate/setup_pd.sh b/tests/e2e/pd_disaggreate/setup_pd.sh new file mode 100644 index 000000000..c73cee27f --- /dev/null +++ b/tests/e2e/pd_disaggreate/setup_pd.sh @@ -0,0 +1,138 @@ +#!/bin/bash + +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + + +function cleanup_instances() { + VLLM_PID=$(pgrep -f "vllm serve") + _info "===> Try kill -2 ${VLLM_PID} to exit." + kill -2 "$VLLM_PID" + wait_for_exit "$VLLM_PID" +} + + +function run_prefill_instance() { + local model_name=$1 + local tp_size=$2 + local prefill_port=$3 + local register_port=$4 + + echo "================================" + echo "Testing model: $model_name" + echo "================================" + # Start prefill instance + + KV_CONFIG=$(jq -n \ + --arg kv_connector "AscendSimpleConnector" \ + --arg kv_buffer_device "npu" \ + --arg kv_role "kv_producer" \ + --argjson kv_parallel_size 8 \ + --arg kv_port "11001" \ + --argjson llmdatadist_comm_port "26000" \ + --arg proxy_ip "0.0.0.0" \ + --argjson proxy_port "$register_port" \ + --argjson http_port "$prefill_port" \ + '{ + "kv_connector": $kv_connector, + "kv_buffer_device": $kv_buffer_device, + "kv_role": $kv_role, + "kv_parallel_size": $kv_parallel_size, + "kv_port": $kv_port, + "kv_connector_extra_config": { + "prompt_device_ips": ["29.7.130.29"], + "decode_device_ips": ["29.7.186.66"], + "llmdatadist_comm_port": $llmdatadist_comm_port, + "proxy_ip": $proxy_ip, + "proxy_port": $proxy_port, + "http_port": $http_port + } + }') + + # start prefill instance + ASCEND_RT_VISIBLE_DEVICES=0 vllm serve $model_name \ + --host 0.0.0.0 \ + --port $prefill_port \ + --tensor-parallel-size $tp_size \ + --served-model-name Deepseek \ + --max-model-len 2000 \ + --trust-remote-code \ + --kv-transfer-config "$KV_CONFIG" & +} + + + +function run_decode_instance() { + # Start decode instance + local model_name=$1 + local tp_size=$2 + local decode_port=$3 + local register_port=$4 + + KV_CONFIG=$(jq -n \ + --arg kv_connector "AscendSimpleConnector" \ + --arg kv_buffer_device "npu" \ + --arg kv_role "kv_consumer" \ + --argjson kv_parallel_size 8 \ + --arg kv_port "21001" \ + --argjson llmdatadist_comm_port "26000" \ + --arg proxy_ip "0.0.0.0" \ + --argjson proxy_port "$register_port" \ + --argjson http_port "$decode_port" \ + '{ + "kv_connector": $kv_connector, + "kv_buffer_device": $kv_buffer_device, + "kv_role": $kv_role, + "kv_parallel_size": $kv_parallel_size, + "kv_port": $kv_port, + "kv_connector_extra_config": { + "prompt_device_ips": ["29.7.130.29"], + "decode_device_ips": ["29.7.186.66"], + "llmdatadist_comm_port": $llmdatadist_comm_port, + "proxy_ip": $proxy_ip, + "proxy_port": $proxy_port, + "http_port": $http_port + } + }') + + # start decode instance + ASCEND_RT_VISIBLE_DEVICES=1 vllm serve $model_name \ + --host 0.0.0.0 \ + --port $decode_port \ + --tensor-parallel-size $tp_size \ + --seed 1024 \ + --served-model-name Deepseek \ + --max-model-len 2000 \ + --max-num-batched-tokens 2000 \ + --trust-remote-code \ + --gpu-memory-utilization 0.9 \ + --kv-transfer-config "$KV_CONFIG" & +} + +function run_proxy_server() { + # Build the command for the proxy server with all the hosts and ports + register_port=$1 + proxy_port=$2 + PROXY_CMD="python examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py --http-port $proxy_port --register-port $register_port" + + # Start the proxy server + echo "Starting proxy server with command: $PROXY_CMD" + $PROXY_CMD & + + # Wait for the proxy to start + sleep 3 +} diff --git a/tests/e2e/pd_disaggreate/test_pd_e2e.py b/tests/e2e/pd_disaggreate/test_pd_e2e.py new file mode 100644 index 000000000..4c8852f0c --- /dev/null +++ b/tests/e2e/pd_disaggreate/test_pd_e2e.py @@ -0,0 +1,60 @@ +import os +import signal +import subprocess +import time + +import requests + +PROXY_PORT = 8192 +REGISTER_PORT = 8193 + +SCRIPT_PATH = os.path.abspath("./tests/e2e/run_disagg_pd.sh") + + +def wait_for_port(port, timeout=30): + import socket + start = time.time() + while time.time() - start < timeout: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + if sock.connect_ex(("127.0.0.1", port)) == 0: + return True + time.sleep(1) + raise TimeoutError(f"Port {port} not ready after {timeout}s") + + +def start_and_test_pipeline(): + print("Launching bash script to run vLLM PD setup...") + proc = subprocess.Popen(["bash", SCRIPT_PATH]) + try: + print("Waiting for proxy port to be available...") + wait_for_port(PROXY_PORT, 1200) + + # request + prompt = "The future of AI is" + payload = { + "model": "Deepseek/DeepSeek-V2-Lite-Chat", + "prompt": prompt, + "max_tokens": 64, + "temperature": 0, + } + response = requests.post(f"http://localhost:{PROXY_PORT}/generate", + json=payload, + timeout=10) + assert response.status_code == 200, f"HTTP failed: {response.status_code}" + result = response.json() + print("Response:", result) + assert "text" in result["choices"][0] + assert len(result["choices"][0]["text"].strip()) > 0 + + finally: + # clean up subprocesses + print("Cleaning up subprocess...") + proc.send_signal(signal.SIGINT) + try: + proc.wait(timeout=10) + except subprocess.TimeoutExpired: + proc.kill() + + +def test_disaggregated_pd_pipeline(): + start_and_test_pipeline() diff --git a/tests/e2e/run_disagg_pd.sh b/tests/e2e/run_disagg_pd.sh new file mode 100644 index 000000000..afb863fe2 --- /dev/null +++ b/tests/e2e/run_disagg_pd.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + +set -eo errexit + +. $(dirname "$0")/common.sh +. $(dirname "$0")/pd_disaggreate/setup_pd.sh + +MODEL_NAME="/home/cmq/vllm-workspace/DeepSeek-V2-Lite" +TP_SIZE=1 + +_info "====> Start pd disaggregated test" +REGISTER_PORT=10101 +PREOXY_PORT=10102 +run_proxy_server $REGISTER_PORT $PREOXY_PORT +_info "Started pd disaggregated proxy server" + +PREFILL_PROC_NAME="Prefill-instance" +PREFILL_PORT=8001 +run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT +_info "Startting prefill instance" + +wait_url_ready $PREFILL_PROC_NAME "http://localhost:${PREFILL_PORT}/v1/completions" + +DECODE_PROC_NAME="Decode-instance" +DECODE_PORT=8002 +run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT +_info "Startting decode instance" + +wait_url_ready $DECODE_PROC_NAME "http://localhost:${DECODE_PORT}/v1/completions" + +_info "pd disaggregated system is ready for handling request" diff --git a/vllm_ascend/distributed/kv_transfer/simple_pipe.py b/vllm_ascend/distributed/kv_transfer/simple_pipe.py index ec84cb2de..c07bb537a 100644 --- a/vllm_ascend/distributed/kv_transfer/simple_pipe.py +++ b/vllm_ascend/distributed/kv_transfer/simple_pipe.py @@ -98,7 +98,7 @@ def __init__( if proxy_ip == "" or proxy_port == "": self.proxy_address = "" else: - self.proxy_address = proxy_ip + ":" + proxy_port + self.proxy_address = proxy_ip + ":" + str(proxy_port) self._register_thread = None if port_offset == 0 and self.proxy_address != "": From 1708df57db40e2a1891994136c61a169496a6af2 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Tue, 20 May 2025 03:07:05 +0000 Subject: [PATCH 2/4] some fixes Signed-off-by: MengqingCao --- .github/workflows/vllm_ascend_test_pd.yaml | 18 +++++ .../disaggregated_prefill_offline.py | 2 +- .../p2p_disaggrefated_prefill_proxy.py | 11 ++- format.sh | 30 ++++----- requirements-dev.txt | 1 + tests/e2e/pd_disaggreate/setup_pd.sh | 16 +++-- tests/e2e/pd_disaggreate/test_pd_e2e.py | 67 ++++++++++++++++--- tests/e2e/run_disagg_pd.sh | 20 ++++-- .../distributed/kv_transfer/simple_pipe.py | 12 ++-- 9 files changed, 136 insertions(+), 41 deletions(-) diff --git a/.github/workflows/vllm_ascend_test_pd.yaml b/.github/workflows/vllm_ascend_test_pd.yaml index 9a2c8bbe8..cf13c1375 100644 --- a/.github/workflows/vllm_ascend_test_pd.yaml +++ b/.github/workflows/vllm_ascend_test_pd.yaml @@ -100,3 +100,21 @@ jobs: run: | pip install -r requirements-dev.txt pip install -v -e . + # only run test on spec decode when the related code changed + - name: Check for changes in Speculative Decode + if: github.event_name != 'schedule' + id: filter_pd + uses: dorny/paths-filter@v3 + with: + filters: | + pd_tests_changed: + - ".github/workflows/vllm_ascend_test_pd.yaml" + - "tests/e2e/pd_disaggreate/**" + - "tests/e2e/run_disagg_pd.sh" + - "vllm_ascend/distributed/**" + - "vllm_ascend/models/deepseek_v2.py" + + - name: Run vllm-project/vllm-ascend PD Disaggregation test + if: steps.filter_pd.outputs.pd_tests_changed == 'true' || github.event_name == 'schedule' + run: | + pytest -sv tests/e2e/pd_disaggreate/test_pd_e2e.py diff --git a/examples/disaggregated_prefill/disaggregated_prefill_offline.py b/examples/disaggregated_prefill/disaggregated_prefill_offline.py index af7b66398..d7dd4b88b 100644 --- a/examples/disaggregated_prefill/disaggregated_prefill_offline.py +++ b/examples/disaggregated_prefill/disaggregated_prefill_offline.py @@ -13,7 +13,7 @@ from multiprocessing import Event, Process kv_connector_extra_config = { - "prompt_device_ips": ["1.2.3.1", "1.2.3.2"], + "prefill_device_ips": ["1.2.3.1", "1.2.3.2"], "decode_device_ips": ["1.2.3.9", "1.2.3.10"], "llmdatadist_comm_port": 26000, } diff --git a/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py b/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py index 6f8b57b7d..03192561e 100644 --- a/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py +++ b/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py @@ -181,6 +181,13 @@ async def handle_request(): if __name__ == "__main__": - t = start_service_discovery("0.0.0.0", 30001) - app.run(host="0.0.0.0", port=10001) + import argparse + parser = argparse.ArgumentParser( + description="args of disaggregated-prefill proxy") + parser.add_argument("--http-port", type=int, default=10001) + parser.add_argument("--register-port", type=int, default=10002) + args = parser.parse_args() + + t = start_service_discovery("0.0.0.0", args.register_port) + app.run(host="0.0.0.0", port=args.http_port) t.join() diff --git a/format.sh b/format.sh index 595bf2f58..27c1ae19b 100755 --- a/format.sh +++ b/format.sh @@ -173,21 +173,21 @@ spell_check_changed() { fi } -echo 'vllm-ascend codespell:' -# Run Codespell -## This flag runs spell check of individual files. --files *must* be the first command line -## arg to use this option. -if [[ "$1" == '--files' ]]; then - spell_check "${@:2}" - # If `--all` is passed, then any further arguments are ignored and the - # entire python directory is linted. -elif [[ "$1" == '--all' ]]; then - spell_check_all -else - # Check spelling only of the files that changed in last commit. - spell_check_changed -fi -echo 'vllm-ascend codespell: Done' +# echo 'vllm-ascend codespell:' +# # Run Codespell +# ## This flag runs spell check of individual files. --files *must* be the first command line +# ## arg to use this option. +# if [[ "$1" == '--files' ]]; then +# spell_check "${@:2}" +# # If `--all` is passed, then any further arguments are ignored and the +# # entire python directory is linted. +# elif [[ "$1" == '--all' ]]; then +# spell_check_all +# else +# # Check spelling only of the files that changed in last commit. +# spell_check_changed +# fi +# echo 'vllm-ascend codespell: Done' # Lint specified files diff --git a/requirements-dev.txt b/requirements-dev.txt index f1aed477e..c8264a926 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,3 +10,4 @@ types-jsonschema xgrammar zmq quart +types-psutil diff --git a/tests/e2e/pd_disaggreate/setup_pd.sh b/tests/e2e/pd_disaggreate/setup_pd.sh index c73cee27f..b665bca01 100644 --- a/tests/e2e/pd_disaggreate/setup_pd.sh +++ b/tests/e2e/pd_disaggreate/setup_pd.sh @@ -31,6 +31,8 @@ function run_prefill_instance() { local tp_size=$2 local prefill_port=$3 local register_port=$4 + local prefill_device_ips=$5 + local decode_device_ips=$6 echo "================================" echo "Testing model: $model_name" @@ -43,6 +45,8 @@ function run_prefill_instance() { --arg kv_role "kv_producer" \ --argjson kv_parallel_size 8 \ --arg kv_port "11001" \ + --argjson prefill_device_ips "$prefill_device_ips" \ + --argjson decode_device_ips "$decode_device_ips" \ --argjson llmdatadist_comm_port "26000" \ --arg proxy_ip "0.0.0.0" \ --argjson proxy_port "$register_port" \ @@ -54,8 +58,8 @@ function run_prefill_instance() { "kv_parallel_size": $kv_parallel_size, "kv_port": $kv_port, "kv_connector_extra_config": { - "prompt_device_ips": ["29.7.130.29"], - "decode_device_ips": ["29.7.186.66"], + "prefill_device_ips": $prefill_device_ips, + "decode_device_ips": $decode_device_ips, "llmdatadist_comm_port": $llmdatadist_comm_port, "proxy_ip": $proxy_ip, "proxy_port": $proxy_port, @@ -82,6 +86,8 @@ function run_decode_instance() { local tp_size=$2 local decode_port=$3 local register_port=$4 + local prefill_device_ips=$5 + local decode_device_ips=$6 KV_CONFIG=$(jq -n \ --arg kv_connector "AscendSimpleConnector" \ @@ -89,6 +95,8 @@ function run_decode_instance() { --arg kv_role "kv_consumer" \ --argjson kv_parallel_size 8 \ --arg kv_port "21001" \ + --argjson prefill_device_ips "$prefill_device_ips" \ + --argjson decode_device_ips "$decode_device_ips" \ --argjson llmdatadist_comm_port "26000" \ --arg proxy_ip "0.0.0.0" \ --argjson proxy_port "$register_port" \ @@ -100,8 +108,8 @@ function run_decode_instance() { "kv_parallel_size": $kv_parallel_size, "kv_port": $kv_port, "kv_connector_extra_config": { - "prompt_device_ips": ["29.7.130.29"], - "decode_device_ips": ["29.7.186.66"], + "prefill_device_ips": $prefill_device_ips, + "decode_device_ips": $decode_device_ips, "llmdatadist_comm_port": $llmdatadist_comm_port, "proxy_ip": $proxy_ip, "proxy_port": $proxy_port, diff --git a/tests/e2e/pd_disaggreate/test_pd_e2e.py b/tests/e2e/pd_disaggreate/test_pd_e2e.py index 4c8852f0c..cef1915e2 100644 --- a/tests/e2e/pd_disaggreate/test_pd_e2e.py +++ b/tests/e2e/pd_disaggreate/test_pd_e2e.py @@ -1,12 +1,60 @@ +#!/bin/bash + +# +# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# This file is a part of the vllm-ascend project. +# + import os import signal import subprocess import time +import psutil import requests -PROXY_PORT = 8192 -REGISTER_PORT = 8193 + +def kill_process_and_children(pid): + try: + parent = psutil.Process(pid) + children = parent.children(recursive=True) + for child in children: + print(f"Killing child process {child.pid}") + child.kill() + print(f"Killing parent process {pid}") + parent.kill() + except psutil.NoSuchProcess: + pass + + +def kill_all_vllm_related(): + current_pid = os.getpid() + + for proc in psutil.process_iter(['pid', 'cmdline']): + try: + if proc.pid == current_pid: + continue + cmd = ' '.join(proc.info['cmdline']) + if "vllm" in cmd or "proxy" in cmd or "engine_worker" in cmd: + kill_process_and_children(proc.pid) + except Exception: + continue + + +PROXY_PORT = 10102 +DECODE_PORT = 8002 SCRIPT_PATH = os.path.abspath("./tests/e2e/run_disagg_pd.sh") @@ -28,18 +76,20 @@ def start_and_test_pipeline(): try: print("Waiting for proxy port to be available...") wait_for_port(PROXY_PORT, 1200) + wait_for_port(DECODE_PORT, 1200) # request - prompt = "The future of AI is" payload = { - "model": "Deepseek/DeepSeek-V2-Lite-Chat", - "prompt": prompt, + "model": "Deepseek", + "prompt": "The future of AI is", "max_tokens": 64, "temperature": 0, } - response = requests.post(f"http://localhost:{PROXY_PORT}/generate", - json=payload, - timeout=10) + response = requests.post( + f"http://localhost:{PROXY_PORT}/v1/completions", + headers={"Content-Type": "application/json"}, + json=payload, + timeout=10) assert response.status_code == 200, f"HTTP failed: {response.status_code}" result = response.json() print("Response:", result) @@ -54,6 +104,7 @@ def start_and_test_pipeline(): proc.wait(timeout=10) except subprocess.TimeoutExpired: proc.kill() + kill_all_vllm_related() def test_disaggregated_pd_pipeline(): diff --git a/tests/e2e/run_disagg_pd.sh b/tests/e2e/run_disagg_pd.sh index afb863fe2..6f1400152 100644 --- a/tests/e2e/run_disagg_pd.sh +++ b/tests/e2e/run_disagg_pd.sh @@ -22,9 +22,19 @@ set -eo errexit . $(dirname "$0")/common.sh . $(dirname "$0")/pd_disaggreate/setup_pd.sh -MODEL_NAME="/home/cmq/vllm-workspace/DeepSeek-V2-Lite" +export VLLM_USE_MODELSCOPE="True" + +MODEL_NAME="deepseek-ai/DeepSeek-V2-Lite" +# TODO: add tp case TP_SIZE=1 +# TODO: support multi-card +prefill_ip=$(hccn_tool -i 0 -ip -g | grep "ipaddr" | awk -F: '{print $2}' | xargs) +PREFILL_DEVICE_IPS="[\"$prefill_ip\"]" + +decode_ip=$(hccn_tool -i 1 -ip -g | grep "ipaddr" | awk -F: '{print $2}' | xargs) +DECODE_DEVICE_IPS="[\"$decode_ip\"]" + _info "====> Start pd disaggregated test" REGISTER_PORT=10101 PREOXY_PORT=10102 @@ -33,15 +43,15 @@ _info "Started pd disaggregated proxy server" PREFILL_PROC_NAME="Prefill-instance" PREFILL_PORT=8001 -run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT -_info "Startting prefill instance" +run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS +_info "Starting prefill instance" wait_url_ready $PREFILL_PROC_NAME "http://localhost:${PREFILL_PORT}/v1/completions" DECODE_PROC_NAME="Decode-instance" DECODE_PORT=8002 -run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT -_info "Startting decode instance" +run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS +_info "Starting decode instance" wait_url_ready $DECODE_PROC_NAME "http://localhost:${DECODE_PORT}/v1/completions" diff --git a/vllm_ascend/distributed/kv_transfer/simple_pipe.py b/vllm_ascend/distributed/kv_transfer/simple_pipe.py index c07bb537a..79c950793 100644 --- a/vllm_ascend/distributed/kv_transfer/simple_pipe.py +++ b/vllm_ascend/distributed/kv_transfer/simple_pipe.py @@ -61,22 +61,22 @@ def __init__( raise NotImplementedError( "kv_role should be inside [kv_producer, kv_consumer]") - prompt_device_ips = kv_connector_extra_config.get( - "prompt_device_ips", None) + prefill_device_ips = kv_connector_extra_config.get( + "prefill_device_ips", None) decode_device_ips = kv_connector_extra_config.get( "decode_device_ips", None) - if prompt_device_ips is None or decode_device_ips is None: + if prefill_device_ips is None or decode_device_ips is None: raise ValueError( - "Please specify prompt_device_ips and decode_device_ips" + "Please specify prefill_device_ips and decode_device_ips" "in kv_transfer_config.kv_connector_extra_config") - p_device_num = len(prompt_device_ips) + p_device_num = len(prefill_device_ips) d_device_num = len(decode_device_ips) # When number of devices in P and D is not equal, # we assume that device in D can be mapped to any device in P. self.p_device_rank = self.rank % p_device_num self.d_device_rank = self.rank % d_device_num - self.prompt_ip_list = prompt_device_ips + self.prompt_ip_list = prefill_device_ips self.decode_ip_list = decode_device_ips self.llmdatadist_comm_port = kv_connector_extra_config.get( "llmdatadist_comm_port", 26000) From 374f322af3e36dc0acf0725cfe1f8e5e0b70db78 Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Tue, 20 May 2025 03:08:34 +0000 Subject: [PATCH 3/4] tiny fix Signed-off-by: MengqingCao --- .github/workflows/vllm_ascend_test_pd.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/vllm_ascend_test_pd.yaml b/.github/workflows/vllm_ascend_test_pd.yaml index cf13c1375..4b7c1c2ed 100644 --- a/.github/workflows/vllm_ascend_test_pd.yaml +++ b/.github/workflows/vllm_ascend_test_pd.yaml @@ -100,8 +100,8 @@ jobs: run: | pip install -r requirements-dev.txt pip install -v -e . - # only run test on spec decode when the related code changed - - name: Check for changes in Speculative Decode + # only run test on PD Disaggregation when the related code changed + - name: Check for changes in PD Disaggregation if: github.event_name != 'schedule' id: filter_pd uses: dorny/paths-filter@v3 From 4b3492ed3a76652d953cffe3283f0724ade2462b Mon Sep 17 00:00:00 2001 From: MengqingCao Date: Tue, 20 May 2025 03:10:22 +0000 Subject: [PATCH 4/4] fix format Signed-off-by: MengqingCao --- format.sh | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/format.sh b/format.sh index 27c1ae19b..595bf2f58 100755 --- a/format.sh +++ b/format.sh @@ -173,21 +173,21 @@ spell_check_changed() { fi } -# echo 'vllm-ascend codespell:' -# # Run Codespell -# ## This flag runs spell check of individual files. --files *must* be the first command line -# ## arg to use this option. -# if [[ "$1" == '--files' ]]; then -# spell_check "${@:2}" -# # If `--all` is passed, then any further arguments are ignored and the -# # entire python directory is linted. -# elif [[ "$1" == '--all' ]]; then -# spell_check_all -# else -# # Check spelling only of the files that changed in last commit. -# spell_check_changed -# fi -# echo 'vllm-ascend codespell: Done' +echo 'vllm-ascend codespell:' +# Run Codespell +## This flag runs spell check of individual files. --files *must* be the first command line +## arg to use this option. +if [[ "$1" == '--files' ]]; then + spell_check "${@:2}" + # If `--all` is passed, then any further arguments are ignored and the + # entire python directory is linted. +elif [[ "$1" == '--all' ]]; then + spell_check_all +else + # Check spelling only of the files that changed in last commit. + spell_check_changed +fi +echo 'vllm-ascend codespell: Done' # Lint specified files