File tree Expand file tree Collapse file tree 2 files changed +44
-3
lines changed Expand file tree Collapse file tree 2 files changed +44
-3
lines changed Original file line number Diff line number Diff line change @@ -145,9 +145,9 @@ class KtransformersEngine(OpenAICompitableEngine):
145
145
146
146
vllm_qwen3_engin084 = VllmEngine (** {
147
147
** vllm_engine064 .model_dump (),
148
- "engine_dockerfile_config" : {"VERSION" :"v0.8.5" },
148
+ "engine_dockerfile_config" : {"VERSION" :"v0.8.5.dev649_g0189a65a2 " },
149
149
"environment_variables" : "export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True" ,
150
- "default_cli_args" : " --max_model_len 16000 --max_num_seq 30 --disable-log-stats --enable-reasoning --reasoning-parser deepseek_r1 --enable-auto-tool-choice --tool-call-parser hermes --enable-prefix-caching"
150
+ "default_cli_args" : " --max_model_len 16000 --max_num_seq 30 --disable-log-stats --enable-reasoning --reasoning-parser qwen3 --enable-auto-tool-choice --tool-call-parser hermes --enable-prefix-caching"
151
151
})
152
152
153
153
Original file line number Diff line number Diff line change 114
114
)
115
115
)
116
116
117
+
118
+
119
+ Model .register (
120
+ dict (
121
+ model_id = "Qwen2.5-VL-7B-Instruct" ,
122
+ supported_engines = [vllm_qwen25vl72b_engine073 ],
123
+ supported_instances = [
124
+ g5d2xlarge_instance ,
125
+ g5d4xlarge_instance ,
126
+ g5d8xlarge_instance ,
127
+ g5d12xlarge_instance ,
128
+ g5d16xlarge_instance ,
129
+ g5d24xlarge_instance ,
130
+ g5d48xlarge_instance ,
131
+ g6e2xlarge_instance ,
132
+ local_instance
133
+ ],
134
+ supported_services = [
135
+ sagemaker_service ,
136
+ sagemaker_async_service ,
137
+ ecs_service ,
138
+ local_service
139
+ ],
140
+ supported_frameworks = [
141
+ fastapi_framework
142
+ ],
143
+ allow_china_region = True ,
144
+ huggingface_model_id = "Qwen/Qwen2.5-VL-7B-Instruct" ,
145
+ modelscope_model_id = "Qwen/Qwen2.5-VL-7B-Instruct" ,
146
+ require_huggingface_token = False ,
147
+ application_scenario = "vision llms for image understanding" ,
148
+ description = "The latest series of Qwen2.5 VL" ,
149
+ model_type = ModelType .VLM ,
150
+ model_series = QWEN2VL_SERIES
151
+ )
152
+ )
153
+
154
+
117
155
Model .register (
118
156
dict (
119
157
model_id = "QVQ-72B-Preview-AWQ" ,
156
194
local_instance
157
195
],
158
196
supported_services = [
159
- sagemaker_service , sagemaker_async_service ,local_service
197
+ sagemaker_service ,
198
+ sagemaker_async_service ,
199
+ ecs_service ,
200
+ local_service
160
201
],
161
202
supported_frameworks = [
162
203
fastapi_framework
You can’t perform that action at this time.
0 commit comments