feat: add qwen3 qwq models (#120)

11zhouxuan · web-flow · commit 562d58d7f8d2 · 2025-05-08T19:15:32.000+08:00
* merge

* merge

* add Mistral-Small-3.1-24B-Instruct-2503

* modify qwq-32b deploy

* add txgemma model;

* modify model list command

* fix typo

* add some ecs parameters

* add glm4-z1 models

* modify vllm backend

* add qwen3

* fix cli bugs

* fix

* add deeseek r1/Qwen3-235B-A22B

* fix local deploy account bug

* add qwen 3 awq models
diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py
@@ -639,6 +639,42 @@
 )
 
 
+
+Model.register(
+    dict(
+        model_id = "Qwen3-14B-AWQ",
+        supported_engines=[vllm_qwen3_engin084],
+        supported_instances=[
+            g5d2xlarge_instance,
+            g5d4xlarge_instance,
+            g5d8xlarge_instance,
+            g5d16xlarge_instance,
+            g4dn2xlarge_instance,
+            # g5d24xlarge_instance,
+            # g5d48xlarge_instance,
+            local_instance
+        ],
+        supported_services=[
+            sagemaker_service,
+            sagemaker_async_service,
+            ecs_service,
+            local_service
+        ],
+        supported_frameworks=[
+            fastapi_framework
+        ],
+        allow_china_region=True,
+        huggingface_model_id="Qwen/Qwen3-14B-AWQ",
+        modelscope_model_id="Qwen/Qwen3-14B-AWQ",
+        require_huggingface_token=False,
+        application_scenario="Agent, tool use, translation, summary",
+        description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
+        model_type=ModelType.LLM,
+        model_series=QWEN3_SERIES
+    )
+)
+
+
 Model.register(
     dict(
         model_id = "Qwen3-14B",
@@ -709,6 +745,41 @@
 # )
 
 
+
+Model.register(
+    dict(
+        model_id = "Qwen3-32B-AWQ",
+        supported_engines=[vllm_qwen3_engin084],
+        supported_instances=[
+            g5d12xlarge_instance,
+            g5d24xlarge_instance,
+            g5d48xlarge_instance,
+            # g5d24xlarge_instance,
+            # g5d48xlarge_instance,
+            local_instance
+        ],
+        supported_services=[
+            sagemaker_service,
+            sagemaker_async_service,
+            ecs_service,
+            local_service
+        ],
+        supported_frameworks=[
+            fastapi_framework
+        ],
+        allow_china_region=True,
+        huggingface_model_id="Qwen/Qwen3-32B-AWQ",
+        modelscope_model_id="Qwen/Qwen3-32B-AWQ",
+        require_huggingface_token=False,
+        application_scenario="Agent, tool use, translation, summary",
+        description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.",
+        model_type=ModelType.LLM,
+        model_series=QWEN3_SERIES
+    )
+)
+
+
+
 Model.register(
     dict(
         model_id = "Qwen3-32B",