Skip to content

Commit d5362a4

Browse files
authored
fix:fix qwen3 deployment (#125)
* merge * merge * add Mistral-Small-3.1-24B-Instruct-2503 * modify qwq-32b deploy * add txgemma model; * modify model list command * fix typo * add some ecs parameters * add glm4-z1 models * modify vllm backend * add qwen3 * fix cli bugs * fix * add deeseek r1/Qwen3-235B-A22B * fix local deploy account bug * add qwen 3 awq models * fix serialize_utils bugs * modify qwen3 deployment
1 parent 6e8ece0 commit d5362a4

File tree

2 files changed

+44
-3
lines changed

2 files changed

+44
-3
lines changed

src/emd/models/engines.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,9 @@ class KtransformersEngine(OpenAICompitableEngine):
145145

146146
vllm_qwen3_engin084 = VllmEngine(**{
147147
**vllm_engine064.model_dump(),
148-
"engine_dockerfile_config": {"VERSION":"v0.8.5"},
148+
"engine_dockerfile_config": {"VERSION":"v0.8.5.dev649_g0189a65a2"},
149149
"environment_variables": "export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
150-
"default_cli_args": " --max_model_len 16000 --max_num_seq 30 --disable-log-stats --enable-reasoning --reasoning-parser deepseek_r1 --enable-auto-tool-choice --tool-call-parser hermes --enable-prefix-caching"
150+
"default_cli_args": " --max_model_len 16000 --max_num_seq 30 --disable-log-stats --enable-reasoning --reasoning-parser qwen3 --enable-auto-tool-choice --tool-call-parser hermes --enable-prefix-caching"
151151
})
152152

153153

src/emd/models/vlms/qwen.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,44 @@
114114
)
115115
)
116116

117+
118+
119+
Model.register(
120+
dict(
121+
model_id = "Qwen2.5-VL-7B-Instruct",
122+
supported_engines=[vllm_qwen25vl72b_engine073],
123+
supported_instances=[
124+
g5d2xlarge_instance,
125+
g5d4xlarge_instance,
126+
g5d8xlarge_instance,
127+
g5d12xlarge_instance,
128+
g5d16xlarge_instance,
129+
g5d24xlarge_instance,
130+
g5d48xlarge_instance,
131+
g6e2xlarge_instance,
132+
local_instance
133+
],
134+
supported_services=[
135+
sagemaker_service,
136+
sagemaker_async_service,
137+
ecs_service,
138+
local_service
139+
],
140+
supported_frameworks=[
141+
fastapi_framework
142+
],
143+
allow_china_region=True,
144+
huggingface_model_id="Qwen/Qwen2.5-VL-7B-Instruct",
145+
modelscope_model_id="Qwen/Qwen2.5-VL-7B-Instruct",
146+
require_huggingface_token=False,
147+
application_scenario="vision llms for image understanding",
148+
description="The latest series of Qwen2.5 VL",
149+
model_type=ModelType.VLM,
150+
model_series=QWEN2VL_SERIES
151+
)
152+
)
153+
154+
117155
Model.register(
118156
dict(
119157
model_id = "QVQ-72B-Preview-AWQ",
@@ -156,7 +194,10 @@
156194
local_instance
157195
],
158196
supported_services=[
159-
sagemaker_service, sagemaker_async_service,local_service
197+
sagemaker_service,
198+
sagemaker_async_service,
199+
ecs_service,
200+
local_service
160201
],
161202
supported_frameworks=[
162203
fastapi_framework

0 commit comments

Comments
 (0)