Skip to content

Commit cb341c7

Browse files
authored
[CI] Fix PD job (#1129)
Fix e2e test for Pd job Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent e63fc6f commit cb341c7

File tree

3 files changed

+8
-8
lines changed

3 files changed

+8
-8
lines changed

tests/e2e/common.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ _err() { _red "Error: $*" && exit 1; }
1414

1515
CURL_TIMEOUT=1
1616
CURL_COOLDOWN=5
17-
CURL_MAX_TRIES=120
17+
CURL_MAX_TRIES=180
1818

1919
function wait_url_ready() {
2020
local serve_name="$1"
@@ -31,7 +31,7 @@ function wait_url_ready() {
3131
break
3232
fi
3333
if [ "$i" -gt "$CURL_MAX_TRIES" ]; then
34-
_info "===> \$CURL_MAX_TRIES exceeded waiting for ${serve_name} to be ready"
34+
_info "===> ${CURL_MAX_TRIES}s exceeded waiting for ${serve_name} to be ready"
3535
return 1
3636
fi
3737
sleep "$CURL_COOLDOWN"

tests/e2e/pd_disaggreate/setup_pd.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ function run_prefill_instance() {
6666
--served-model-name Deepseek \
6767
--max-model-len 2000 \
6868
--trust-remote-code \
69-
--kv-transfer-config "$KV_CONFIG" &
69+
--kv-transfer-config "$KV_CONFIG"
7070
}
7171

7272

@@ -119,7 +119,7 @@ function run_decode_instance() {
119119
--max-num-batched-tokens 2000 \
120120
--trust-remote-code \
121121
--gpu-memory-utilization 0.9 \
122-
--kv-transfer-config "$KV_CONFIG" &
122+
--kv-transfer-config "$KV_CONFIG"
123123
}
124124

125125
function run_proxy_server() {

tests/e2e/run_disagg_pd.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,16 @@ _info "Started pd disaggregated proxy server"
4343

4444
PREFILL_PROC_NAME="Prefill-instance"
4545
PREFILL_PORT=8001
46-
run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
4746
_info "Starting prefill instance"
48-
47+
run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS &
48+
_info "Waiting for prefill instance ready"
4949
wait_url_ready $PREFILL_PROC_NAME "http://localhost:${PREFILL_PORT}/v1/completions"
5050

5151
DECODE_PROC_NAME="Decode-instance"
5252
DECODE_PORT=8002
53-
run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
5453
_info "Starting decode instance"
55-
54+
run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS &
55+
_info "Waiting for decode instance ready"
5656
wait_url_ready $DECODE_PROC_NAME "http://localhost:${DECODE_PORT}/v1/completions"
5757

5858
_info "pd disaggregated system is ready for handling request"

0 commit comments

Comments
 (0)