|
639 | 639 | )
|
640 | 640 |
|
641 | 641 |
|
| 642 | + |
| 643 | +Model.register( |
| 644 | + dict( |
| 645 | + model_id = "Qwen3-14B-AWQ", |
| 646 | + supported_engines=[vllm_qwen3_engin084], |
| 647 | + supported_instances=[ |
| 648 | + g5d2xlarge_instance, |
| 649 | + g5d4xlarge_instance, |
| 650 | + g5d8xlarge_instance, |
| 651 | + g5d16xlarge_instance, |
| 652 | + g4dn2xlarge_instance, |
| 653 | + # g5d24xlarge_instance, |
| 654 | + # g5d48xlarge_instance, |
| 655 | + local_instance |
| 656 | + ], |
| 657 | + supported_services=[ |
| 658 | + sagemaker_service, |
| 659 | + sagemaker_async_service, |
| 660 | + ecs_service, |
| 661 | + local_service |
| 662 | + ], |
| 663 | + supported_frameworks=[ |
| 664 | + fastapi_framework |
| 665 | + ], |
| 666 | + allow_china_region=True, |
| 667 | + huggingface_model_id="Qwen/Qwen3-14B-AWQ", |
| 668 | + modelscope_model_id="Qwen/Qwen3-14B-AWQ", |
| 669 | + require_huggingface_token=False, |
| 670 | + application_scenario="Agent, tool use, translation, summary", |
| 671 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 672 | + model_type=ModelType.LLM, |
| 673 | + model_series=QWEN3_SERIES |
| 674 | + ) |
| 675 | +) |
| 676 | + |
| 677 | + |
642 | 678 | Model.register(
|
643 | 679 | dict(
|
644 | 680 | model_id = "Qwen3-14B",
|
|
709 | 745 | # )
|
710 | 746 |
|
711 | 747 |
|
| 748 | + |
| 749 | +Model.register( |
| 750 | + dict( |
| 751 | + model_id = "Qwen3-32B-AWQ", |
| 752 | + supported_engines=[vllm_qwen3_engin084], |
| 753 | + supported_instances=[ |
| 754 | + g5d12xlarge_instance, |
| 755 | + g5d24xlarge_instance, |
| 756 | + g5d48xlarge_instance, |
| 757 | + # g5d24xlarge_instance, |
| 758 | + # g5d48xlarge_instance, |
| 759 | + local_instance |
| 760 | + ], |
| 761 | + supported_services=[ |
| 762 | + sagemaker_service, |
| 763 | + sagemaker_async_service, |
| 764 | + ecs_service, |
| 765 | + local_service |
| 766 | + ], |
| 767 | + supported_frameworks=[ |
| 768 | + fastapi_framework |
| 769 | + ], |
| 770 | + allow_china_region=True, |
| 771 | + huggingface_model_id="Qwen/Qwen3-32B-AWQ", |
| 772 | + modelscope_model_id="Qwen/Qwen3-32B-AWQ", |
| 773 | + require_huggingface_token=False, |
| 774 | + application_scenario="Agent, tool use, translation, summary", |
| 775 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 776 | + model_type=ModelType.LLM, |
| 777 | + model_series=QWEN3_SERIES |
| 778 | + ) |
| 779 | +) |
| 780 | + |
| 781 | + |
| 782 | + |
712 | 783 | Model.register(
|
713 | 784 | dict(
|
714 | 785 | model_id = "Qwen3-32B",
|
|
0 commit comments