File tree Expand file tree Collapse file tree 3 files changed +8
-8
lines changed Expand file tree Collapse file tree 3 files changed +8
-8
lines changed Original file line number Diff line number Diff line change @@ -14,7 +14,7 @@ _err() { _red "Error: $*" && exit 1; }
14
14
15
15
CURL_TIMEOUT=1
16
16
CURL_COOLDOWN=5
17
- CURL_MAX_TRIES=120
17
+ CURL_MAX_TRIES=180
18
18
19
19
function wait_url_ready() {
20
20
local serve_name=" $1 "
@@ -31,7 +31,7 @@ function wait_url_ready() {
31
31
break
32
32
fi
33
33
if [ " $i " -gt " $CURL_MAX_TRIES " ]; then
34
- _info " ===> \$ CURL_MAX_TRIES exceeded waiting for ${serve_name} to be ready"
34
+ _info " ===> ${ CURL_MAX_TRIES} s exceeded waiting for ${serve_name} to be ready"
35
35
return 1
36
36
fi
37
37
sleep " $CURL_COOLDOWN "
Original file line number Diff line number Diff line change @@ -66,7 +66,7 @@ function run_prefill_instance() {
66
66
--served-model-name Deepseek \
67
67
--max-model-len 2000 \
68
68
--trust-remote-code \
69
- --kv-transfer-config " $KV_CONFIG " &
69
+ --kv-transfer-config " $KV_CONFIG "
70
70
}
71
71
72
72
@@ -119,7 +119,7 @@ function run_decode_instance() {
119
119
--max-num-batched-tokens 2000 \
120
120
--trust-remote-code \
121
121
--gpu-memory-utilization 0.9 \
122
- --kv-transfer-config " $KV_CONFIG " &
122
+ --kv-transfer-config " $KV_CONFIG "
123
123
}
124
124
125
125
function run_proxy_server() {
Original file line number Diff line number Diff line change @@ -43,16 +43,16 @@ _info "Started pd disaggregated proxy server"
43
43
44
44
PREFILL_PROC_NAME=" Prefill-instance"
45
45
PREFILL_PORT=8001
46
- run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
47
46
_info " Starting prefill instance"
48
-
47
+ run_prefill_instance $MODEL_NAME $TP_SIZE $PREFILL_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS &
48
+ _info " Waiting for prefill instance ready"
49
49
wait_url_ready $PREFILL_PROC_NAME " http://localhost:${PREFILL_PORT} /v1/completions"
50
50
51
51
DECODE_PROC_NAME=" Decode-instance"
52
52
DECODE_PORT=8002
53
- run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS
54
53
_info " Starting decode instance"
55
-
54
+ run_decode_instance $MODEL_NAME $TP_SIZE $DECODE_PORT $REGISTER_PORT $PREFILL_DEVICE_IPS $DECODE_DEVICE_IPS &
55
+ _info " Waiting for decode instance ready"
56
56
wait_url_ready $DECODE_PROC_NAME " http://localhost:${DECODE_PORT} /v1/completions"
57
57
58
58
_info " pd disaggregated system is ready for handling request"
You can’t perform that action at this time.
0 commit comments