1
1
#! /bin/bash
2
-
3
2
set -xe
4
3
5
- # Model to run.
6
- MODEL_NAME=Qwen/Qwen3-0.6B
4
+ # Models to run
5
+ MODELS=(
6
+ # "Qwen/Qwen3-0.6B"
7
+ " deepseek-ai/deepseek-vl2-tiny"
8
+ )
7
9
8
10
# Number of prefill and decode instances to create
9
11
NUM_PREFILL_INSTANCES=${NUM_PREFILL_INSTANCES:- 1} # Default to 1
@@ -24,86 +26,147 @@ wait_for_server() {
24
26
done" && return 0 || return 1
25
27
}
26
28
27
- # Arrays to store all hosts and ports
28
- PREFILL_HOSTS=()
29
- PREFILL_PORTS=()
30
- DECODE_HOSTS=()
31
- DECODE_PORTS=()
29
+ # Function to clean up previous instances
30
+ cleanup_instances () {
31
+ echo " Cleaning up any running vLLM instances..."
32
+ pkill -f " vllm serve" || true
33
+ sleep 2
34
+ }
32
35
33
- # Start prefill instances
34
- for i in $( seq 0 $(( NUM_PREFILL_INSTANCES- 1 )) ) ; do
35
- # Calculate GPU ID - we'll distribute across available GPUs
36
- GPU_ID=$(( i % $(nvidia- smi -- query- gpu= name -- format= csv, noheader | wc - l)) )
37
- # Calculate port number (base port + instance number)
38
- PORT=$(( 8100 + i))
39
- # Calculate side channel port
40
- SIDE_CHANNEL_PORT=$(( 5559 + i))
36
+ # Handle to get model-specific arguments for deepseek
37
+ get_model_args () {
38
+ local model_name=$1
39
+ local extra_args=" "
41
40
42
- echo " Starting prefill instance $i on GPU $GPU_ID , port $PORT "
41
+ if [[ " $model_name " == " deepseek-ai/deepseek-vl2-tiny" ]]; then
42
+ extra_args=" --hf_overrides '{\" architectures\" : [\" DeepseekVLV2ForCausalLM\" ]}' --trust-remote-code"
43
+ fi
43
44
44
- CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $MODEL_NAME \
45
- --port $PORT \
46
- --enforce-eager \
47
- --disable-log-requests \
48
- --gpu-memory-utilization 0.2 \
49
- --kv-transfer-config ' {"kv_connector":"NixlConnector","kv_role":"kv_both"}' &
45
+ echo " $extra_args "
46
+ }
50
47
51
- # Store host and port for proxy configuration
52
- PREFILL_HOSTS+=(" localhost" )
53
- PREFILL_PORTS+=($PORT )
54
- done
55
48
56
- # Start decode instances
57
- for i in $( seq 0 $(( NUM_DECODE_INSTANCES- 1 )) ) ; do
58
- # Calculate GPU ID - we'll distribute across available GPUs, starting from after prefill GPUs
59
- GPU_ID=$(( (i + NUM_PREFILL_INSTANCES) % $(nvidia- smi -- query- gpu= name -- format= csv, noheader | wc - l)) )
60
- # Calculate port number (base port + instance number)
61
- PORT=$(( 8200 + i))
62
- # Calculate side channel port
63
- SIDE_CHANNEL_PORT=$(( 5659 + i))
49
+ # Function to run tests for a specific model
50
+ run_tests_for_model () {
51
+ local model_name=$1
52
+ echo " ================================"
53
+ echo " Testing model: $model_name "
54
+ echo " ================================"
55
+
56
+ # Get model-specific arguments
57
+ local model_args=$( get_model_args " $model_name " )
58
+
59
+ # Arrays to store all hosts and ports
60
+ PREFILL_HOSTS=()
61
+ PREFILL_PORTS=()
62
+ DECODE_HOSTS=()
63
+ DECODE_PORTS=()
64
64
65
- echo " Starting decode instance $i on GPU $GPU_ID , port $PORT "
65
+ # Start prefill instances
66
+ for i in $( seq 0 $(( NUM_PREFILL_INSTANCES- 1 )) ) ; do
67
+ # Calculate GPU ID - we'll distribute across available GPUs
68
+ GPU_ID=$(( i % $(nvidia- smi -- query- gpu= name -- format= csv, noheader | wc - l)) )
69
+ # Calculate port number (base port + instance number)
70
+ PORT=$(( 8100 + i))
71
+ # Calculate side channel port
72
+ SIDE_CHANNEL_PORT=$(( 5559 + i))
66
73
67
- CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $MODEL_NAME \
74
+ echo " Starting prefill instance $i on GPU $GPU_ID , port $PORT "
75
+
76
+ # Build the command with or without model-specific args
77
+ BASE_CMD=" CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
68
78
--port $PORT \
69
79
--enforce-eager \
70
80
--disable-log-requests \
71
81
--gpu-memory-utilization 0.2 \
72
- --kv-transfer-config ' {"kv_connector":"NixlConnector","kv_role":"kv_both"}' &
82
+ --kv-transfer-config '{\" kv_connector\" :\" NixlConnector\" ,\" kv_role\" :\" kv_both\" }'"
83
+
84
+ if [ -n " $model_args " ]; then
85
+ FULL_CMD=" $BASE_CMD $model_args "
86
+ else
87
+ FULL_CMD=" $BASE_CMD "
88
+ fi
89
+
90
+ eval " $FULL_CMD &"
91
+
92
+ # Store host and port for proxy configuration
93
+ PREFILL_HOSTS+=(" localhost" )
94
+ PREFILL_PORTS+=($PORT )
95
+ done
96
+
97
+ # Start decode instances
98
+ for i in $( seq 0 $(( NUM_DECODE_INSTANCES- 1 )) ) ; do
99
+ # Calculate GPU ID - we'll distribute across available GPUs, starting from after prefill GPUs
100
+ GPU_ID=$(( (i + NUM_PREFILL_INSTANCES) % $(nvidia- smi -- query- gpu= name -- format= csv, noheader | wc - l)) )
101
+ # Calculate port number (base port + instance number)
102
+ PORT=$(( 8200 + i))
103
+ # Calculate side channel port
104
+ SIDE_CHANNEL_PORT=$(( 5659 + i))
105
+
106
+ echo " Starting decode instance $i on GPU $GPU_ID , port $PORT "
107
+
108
+ # Build the command with or without model-specific args
109
+ BASE_CMD=" CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \
110
+ --port $PORT \
111
+ --enforce-eager \
112
+ --disable-log-requests \
113
+ --gpu-memory-utilization 0.2 \
114
+ --kv-transfer-config '{\" kv_connector\" :\" NixlConnector\" ,\" kv_role\" :\" kv_both\" }'"
73
115
74
- # Store host and port for proxy configuration
75
- DECODE_HOSTS+=(" localhost" )
76
- DECODE_PORTS+=($PORT )
77
- done
116
+ if [ -n " $model_args " ]; then
117
+ FULL_CMD=" $BASE_CMD $model_args "
118
+ else
119
+ FULL_CMD=" $BASE_CMD "
120
+ fi
78
121
79
- # Wait for all instances to start
80
- for PORT in " ${PREFILL_PORTS[@]} " ; do
81
- echo " Waiting for prefill instance on port $PORT to start..."
82
- wait_for_server $PORT
83
- done
122
+ eval " $FULL_CMD &"
84
123
85
- for PORT in " ${DECODE_PORTS[@]} " ; do
86
- echo " Waiting for decode instance on port $PORT to start..."
87
- wait_for_server $PORT
88
- done
124
+ # Store host and port for proxy configuration
125
+ DECODE_HOSTS+=(" localhost" )
126
+ DECODE_PORTS+=($PORT )
127
+ done
128
+
129
+ # Wait for all instances to start
130
+ for PORT in " ${PREFILL_PORTS[@]} " ; do
131
+ echo " Waiting for prefill instance on port $PORT to start..."
132
+ wait_for_server $PORT
133
+ done
89
134
90
- # Build the command for the proxy server with all the hosts and ports
91
- PROXY_CMD=" python ${GIT_ROOT} /tests/v1/kv_connector/toy_proxy_server.py --port 8192"
135
+ for PORT in " ${DECODE_PORTS[@]} " ; do
136
+ echo " Waiting for decode instance on port $PORT to start..."
137
+ wait_for_server $PORT
138
+ done
92
139
93
- # Add all prefill hosts and ports
94
- PROXY_CMD+=" --prefiller-hosts ${PREFILL_HOSTS[@]} "
95
- PROXY_CMD+=" --prefiller-ports ${PREFILL_PORTS[@]} "
140
+ # Build the command for the proxy server with all the hosts and ports
141
+ PROXY_CMD=" python ${GIT_ROOT} /tests/v1/kv_connector/toy_proxy_server.py --port 8192"
96
142
97
- # Add all decode hosts and ports
98
- PROXY_CMD+=" --decoder -hosts ${DECODE_HOSTS [@]} "
99
- PROXY_CMD+=" --decoder -ports ${DECODE_PORTS [@]} "
143
+ # Add all prefill hosts and ports
144
+ PROXY_CMD+=" --prefiller -hosts ${PREFILL_HOSTS [@]} "
145
+ PROXY_CMD+=" --prefiller -ports ${PREFILL_PORTS [@]} "
100
146
101
- # Start the proxy server
102
- echo " Starting proxy server with command: $PROXY_CMD "
103
- $ PROXY_CMD &
147
+ # Add all decode hosts and ports
148
+ PROXY_CMD+= " --decoder-hosts ${DECODE_HOSTS[@]} "
149
+ PROXY_CMD+= " --decoder-ports ${DECODE_PORTS[@]} "
104
150
105
- # Wait for the proxy to start
106
- sleep 5
151
+ # Start the proxy server
152
+ echo " Starting proxy server with command: $PROXY_CMD "
153
+ $PROXY_CMD &
154
+
155
+ # Wait for the proxy to start
156
+ sleep 5
157
+
158
+ # Run lm eval for this model
159
+ echo " Running tests for $model_name "
160
+ TEST_MODEL=$model_name python -m pytest -s -x ${GIT_ROOT} /tests/v1/kv_connector/test_accuracy.py
161
+
162
+ # Clean up before running next model
163
+ cleanup_instances
164
+ sleep 3
165
+ }
166
+
167
+ # Run tests for each model
168
+ for model in " ${MODELS[@]} " ; do
169
+ run_tests_for_model " $model "
170
+ done
107
171
108
- # Run lm eval.
109
- python -m pytest -s -x ${GIT_ROOT} /tests/v1/kv_connector/test_accuracy.py
172
+ echo " All tests completed!"
0 commit comments