Skip to content

Commit 99cc821

Browse files
authored
Merge branch 'main' into cuda128_cudnn971_torch270_250623
2 parents a2570d5 + a50aeb3 commit 99cc821

File tree

45 files changed

+1379
-3059
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1379
-3059
lines changed

examples/cloud/conf/serve.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
defaults:
2+
- _self_
3+
- serve: cloud_model
4+
5+
experiment:
6+
exp_name: cloud
7+
exp_dir: outputs/${experiment.exp_name}
8+
task:
9+
type: serve
10+
runner:
11+
type: cloud
12+
hostfile: ${oc.env:HOSTFILE, /etc/hostfile}
13+
master_addr: ${oc.env:MASTER_ADDR, 127.0.0.1}
14+
master_port: 7396
15+
device_type: ${oc.env:DEVICE_TYPE, gpu}
16+
nproc_per_node: ${oc.env:AIRS_ACCELERATOR_NUM, 1}
17+
deploy:
18+
use_fs_serve: false
19+
envs:
20+
CUDA_DEVICE_MAX_CONNECTIONS: 1
21+
22+
cmds:
23+
before_start: source /root/miniconda3/bin/activate flagscale-inference
24+
25+
action: run
26+
27+
hydra:
28+
run:
29+
dir: ${experiment.exp_dir}/hydra
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
- serve_id: vllm_model
2+
engine: vllm
3+
engine_args:
4+
model: null
5+
port: 8000
6+
host: 0.0.0.0
7+
served_model_name: cloud
8+
tensor_parallel_size: 1
9+
pipeline_parallel_size: 1
10+
max_model_len: 32768

examples/deepseek_v3/conf/serve_auto_tuner.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ experiment:
77
exp_dir: ./outputs/${experiment.exp_name}
88
task:
99
type: serve
10-
deploy:
11-
use_fs_serve: false
1210
runner:
1311
nnodes: 2
1412
nproc_per_node: 8
1513
hostfile: examples/deepseek/conf/hostfile.txt
1614
docker: ds
15+
deploy:
16+
use_fs_serve: false
1717
auto_tuner:
1818
space:
1919
tensor_model_parallel_size: [4, 8]

examples/qwen2_5/conf/serve.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ experiment:
77
exp_dir: outputs/${experiment.exp_name}
88
task:
99
type: serve
10-
deploy:
11-
use_fs_serve: false
1210
runner:
1311
hostfile: null
12+
deploy:
13+
use_fs_serve: false
1414
envs:
1515
CUDA_VISIBLE_DEVICES: 0
1616
CUDA_DEVICE_MAX_CONNECTIONS: 1

examples/qwen2_5/conf/serve_auto_tuner.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ experiment:
77
exp_dir: ./outputs
88
task:
99
type: serve
10-
deploy:
11-
port: 6701
12-
use_fs_serve: false
1310
runner:
1411
nnodes: 1
1512
nproc_per_node: 4
13+
deploy:
14+
port: 6701
15+
use_fs_serve: false
1616
auto_tuner:
1717
space:
1818
tensor_model_parallel_size: "auto"

examples/qwen2_5/conf/serve_disagg_xpyd.yaml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,18 @@ experiment:
77
exp_dir: outputs/${experiment.exp_name}
88
task:
99
type: serve
10-
deploy:
11-
port: 10001
12-
use_fs_serve: false
13-
prefill_decode_disaggregation: true
14-
prefill_num: 2
15-
prefill_address: x.x.x.x # optional, default "auto"
16-
decode_num: 2
17-
decode_address: x.x.x.x # optional, default "auto"
18-
prefill_decode_strategy: slo # optional, one of [slo|random|robin], default slo
1910
runner:
2011
hostfile: examples/qwen/conf/hostfile.txt
2112
docker: fr-v2
13+
deploy:
14+
port: 10001
15+
use_fs_serve: false
16+
prefill_decode_disaggregation: true
17+
prefill_num: 2
18+
prefill_address: x.x.x.x # optional, default "auto"
19+
decode_num: 2
20+
decode_address: x.x.x.x # optional, default "auto"
21+
prefill_decode_strategy: slo # optional, one of [slo|random|robin], default slo
2222
envs:
2323
CUDA_DEVICE_MAX_CONNECTIONS: 1
2424
VLLM_USE_V1: 0

examples/qwen2_5/conf/serve_multiple_instance.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ experiment:
88
task:
99
type: serve
1010
entrypoint: null
11-
deploy:
12-
port: 6701
13-
use_fs_serve: true
1411
runner:
1512
hostfile: null # /path/to/hostfile.txt
1613
docker: ds
1714
ssh_port: 22
1815
nnodes: 2
1916
nproc_per_node: 8
17+
deploy:
18+
port: 6701
19+
use_fs_serve: true
2020
auto_tuner:
2121
space:
2222
tensor_model_parallel_size: [2,4]

examples/qwen3/conf/serve.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ experiment:
77
exp_dir: outputs/${experiment.exp_name}
88
task:
99
type: serve
10-
deploy:
11-
use_fs_serve: false
1210
runner:
1311
hostfile: null
12+
deploy:
13+
use_fs_serve: false
1414
envs:
1515
CUDA_VISIBLE_DEVICES: 0
1616
CUDA_DEVICE_MAX_CONNECTIONS: 1

examples/qwen3/conf/serve_atmb.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ experiment:
88
exp_dir: outputs/${experiment.exp_name}
99
task:
1010
type: serve
11-
deploy:
12-
port: 6701
13-
use_fs_serve: false
1411
runner:
1512
nnodes: 1
1613
nproc_per_node: 4
14+
deploy:
15+
port: 6701
16+
use_fs_serve: false
1717
envs:
1818
CUDA_VISIBLE_DEVICES: 0,1,2,3
1919
CUDA_DEVICE_MAX_CONNECTIONS: 1

examples/robobrain/conf/serve.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ experiment:
88
task:
99
type: serve
1010
entrypoint: null
11-
deploy:
12-
port: 6701
13-
use_fs_serve: true
1411
runner:
1512
hostfile: null #examples/robobrain/conf/hostfile.txt
1613
docker: robobrain
1714
ssh_port: 22
15+
deploy:
16+
port: 6701
17+
use_fs_serve: true
1818

1919
cmds:
2020
before_start: export RAY_DEDUP_LOGS=0

0 commit comments

Comments
 (0)