Skip to content

Commit 562d58d

Browse files
authored
feat: add qwen3 qwq models (#120)
* merge * merge * add Mistral-Small-3.1-24B-Instruct-2503 * modify qwq-32b deploy * add txgemma model; * modify model list command * fix typo * add some ecs parameters * add glm4-z1 models * modify vllm backend * add qwen3 * fix cli bugs * fix * add deeseek r1/Qwen3-235B-A22B * fix local deploy account bug * add qwen 3 awq models
1 parent af5e42d commit 562d58d

File tree

1 file changed

+71
-0
lines changed

1 file changed

+71
-0
lines changed

src/emd/models/llms/qwen.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,42 @@
639639
)
640640

641641

642+
643+
Model.register(
644+
dict(
645+
model_id = "Qwen3-14B-AWQ",
646+
supported_engines=[vllm_qwen3_engin084],
647+
supported_instances=[
648+
g5d2xlarge_instance,
649+
g5d4xlarge_instance,
650+
g5d8xlarge_instance,
651+
g5d16xlarge_instance,
652+
g4dn2xlarge_instance,
653+
# g5d24xlarge_instance,
654+
# g5d48xlarge_instance,
655+
local_instance
656+
],
657+
supported_services=[
658+
sagemaker_service,
659+
sagemaker_async_service,
660+
ecs_service,
661+
local_service
662+
],
663+
supported_frameworks=[
664+
fastapi_framework
665+
],
666+
allow_china_region=True,
667+
huggingface_model_id="Qwen/Qwen3-14B-AWQ",
668+
modelscope_model_id="Qwen/Qwen3-14B-AWQ",
669+
require_huggingface_token=False,
670+
application_scenario="Agent, tool use, translation, summary",
671+
description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
672+
model_type=ModelType.LLM,
673+
model_series=QWEN3_SERIES
674+
)
675+
)
676+
677+
642678
Model.register(
643679
dict(
644680
model_id = "Qwen3-14B",
@@ -709,6 +745,41 @@
709745
# )
710746

711747

748+
749+
Model.register(
750+
dict(
751+
model_id = "Qwen3-32B-AWQ",
752+
supported_engines=[vllm_qwen3_engin084],
753+
supported_instances=[
754+
g5d12xlarge_instance,
755+
g5d24xlarge_instance,
756+
g5d48xlarge_instance,
757+
# g5d24xlarge_instance,
758+
# g5d48xlarge_instance,
759+
local_instance
760+
],
761+
supported_services=[
762+
sagemaker_service,
763+
sagemaker_async_service,
764+
ecs_service,
765+
local_service
766+
],
767+
supported_frameworks=[
768+
fastapi_framework
769+
],
770+
allow_china_region=True,
771+
huggingface_model_id="Qwen/Qwen3-32B-AWQ",
772+
modelscope_model_id="Qwen/Qwen3-32B-AWQ",
773+
require_huggingface_token=False,
774+
application_scenario="Agent, tool use, translation, summary",
775+
description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
776+
model_type=ModelType.LLM,
777+
model_series=QWEN3_SERIES
778+
)
779+
)
780+
781+
782+
712783
Model.register(
713784
dict(
714785
model_id = "Qwen3-32B",

0 commit comments

Comments
 (0)