|
8 | 8 | tgi_qwen2d5_72b_on_inf2,
|
9 | 9 | vllm_qwen2d5_72b_engine064,
|
10 | 10 | vllm_qwq_engine073,
|
11 |
| - vllm_qwq_engine082 |
| 11 | + vllm_qwq_engine082, |
| 12 | + vllm_qwen3_engin084 |
12 | 13 | )
|
13 | 14 | from ..services import (
|
14 | 15 | sagemaker_service,
|
|
34 | 35 | from emd.models.utils.constants import ModelType
|
35 | 36 | from emd.models.utils.constants import ModelType
|
36 | 37 | from emd.models import ModelSeries
|
37 |
| -from ..model_series import QWEN2D5_SERIES,QWEN_REASONING_MODEL |
| 38 | +from ..model_series import QWEN2D5_SERIES,QWEN_REASONING_MODEL,QWEN3_SERIES |
38 | 39 |
|
39 | 40 | Model.register(
|
40 | 41 | dict(
|
|
498 | 499 | model_series=QWEN_REASONING_MODEL
|
499 | 500 | )
|
500 | 501 | )
|
| 502 | + |
| 503 | + |
| 504 | +Model.register( |
| 505 | + dict( |
| 506 | + model_id = "Qwen3-8B", |
| 507 | + supported_engines=[vllm_qwen3_engin084], |
| 508 | + supported_instances=[ |
| 509 | + g5d2xlarge_instance, |
| 510 | + g5d4xlarge_instance, |
| 511 | + g5d8xlarge_instance, |
| 512 | + g5d16xlarge_instance, |
| 513 | + g4dn2xlarge_instance, |
| 514 | + # g5d24xlarge_instance, |
| 515 | + # g5d48xlarge_instance, |
| 516 | + local_instance |
| 517 | + ], |
| 518 | + supported_services=[ |
| 519 | + sagemaker_service, |
| 520 | + sagemaker_async_service, |
| 521 | + ecs_service, |
| 522 | + local_service |
| 523 | + ], |
| 524 | + supported_frameworks=[ |
| 525 | + fastapi_framework |
| 526 | + ], |
| 527 | + allow_china_region=True, |
| 528 | + huggingface_model_id="Qwen/Qwen3-8B", |
| 529 | + modelscope_model_id="Qwen/Qwen3-8B", |
| 530 | + require_huggingface_token=False, |
| 531 | + application_scenario="Agent, tool use, translation, summary", |
| 532 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 533 | + model_type=ModelType.LLM, |
| 534 | + model_series=QWEN3_SERIES |
| 535 | + ) |
| 536 | +) |
| 537 | + |
| 538 | +Model.register( |
| 539 | + dict( |
| 540 | + model_id = "Qwen3-0.6B", |
| 541 | + supported_engines=[vllm_qwen3_engin084], |
| 542 | + supported_instances=[ |
| 543 | + g5d2xlarge_instance, |
| 544 | + g5d4xlarge_instance, |
| 545 | + g5d8xlarge_instance, |
| 546 | + g5d16xlarge_instance, |
| 547 | + g4dn2xlarge_instance, |
| 548 | + # g5d24xlarge_instance, |
| 549 | + # g5d48xlarge_instance, |
| 550 | + local_instance |
| 551 | + ], |
| 552 | + supported_services=[ |
| 553 | + sagemaker_service, |
| 554 | + sagemaker_async_service, |
| 555 | + ecs_service, |
| 556 | + local_service |
| 557 | + ], |
| 558 | + supported_frameworks=[ |
| 559 | + fastapi_framework |
| 560 | + ], |
| 561 | + allow_china_region=True, |
| 562 | + huggingface_model_id="Qwen/Qwen3-0.6B", |
| 563 | + modelscope_model_id="Qwen/Qwen3-0.6B", |
| 564 | + require_huggingface_token=False, |
| 565 | + application_scenario="Agent, tool use, translation, summary", |
| 566 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 567 | + model_type=ModelType.LLM, |
| 568 | + model_series=QWEN3_SERIES |
| 569 | + ) |
| 570 | +) |
| 571 | + |
| 572 | +Model.register( |
| 573 | + dict( |
| 574 | + model_id = "Qwen3-1.7B", |
| 575 | + supported_engines=[vllm_qwen3_engin084], |
| 576 | + supported_instances=[ |
| 577 | + g5d2xlarge_instance, |
| 578 | + g5d4xlarge_instance, |
| 579 | + g5d8xlarge_instance, |
| 580 | + g5d16xlarge_instance, |
| 581 | + g4dn2xlarge_instance, |
| 582 | + # g5d24xlarge_instance, |
| 583 | + # g5d48xlarge_instance, |
| 584 | + local_instance |
| 585 | + ], |
| 586 | + supported_services=[ |
| 587 | + sagemaker_service, |
| 588 | + sagemaker_async_service, |
| 589 | + ecs_service, |
| 590 | + local_service |
| 591 | + ], |
| 592 | + supported_frameworks=[ |
| 593 | + fastapi_framework |
| 594 | + ], |
| 595 | + allow_china_region=True, |
| 596 | + huggingface_model_id="Qwen/Qwen3-1.7B", |
| 597 | + modelscope_model_id="Qwen/Qwen3-1.7B", |
| 598 | + require_huggingface_token=False, |
| 599 | + application_scenario="Agent, tool use, translation, summary", |
| 600 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 601 | + model_type=ModelType.LLM, |
| 602 | + model_series=QWEN3_SERIES |
| 603 | + ) |
| 604 | +) |
| 605 | + |
| 606 | + |
| 607 | +Model.register( |
| 608 | + dict( |
| 609 | + model_id = "Qwen3-4B", |
| 610 | + supported_engines=[vllm_qwen3_engin084], |
| 611 | + supported_instances=[ |
| 612 | + g5d2xlarge_instance, |
| 613 | + g5d4xlarge_instance, |
| 614 | + g5d8xlarge_instance, |
| 615 | + g5d16xlarge_instance, |
| 616 | + g4dn2xlarge_instance, |
| 617 | + # g5d24xlarge_instance, |
| 618 | + # g5d48xlarge_instance, |
| 619 | + local_instance |
| 620 | + ], |
| 621 | + supported_services=[ |
| 622 | + sagemaker_service, |
| 623 | + sagemaker_async_service, |
| 624 | + ecs_service, |
| 625 | + local_service |
| 626 | + ], |
| 627 | + supported_frameworks=[ |
| 628 | + fastapi_framework |
| 629 | + ], |
| 630 | + allow_china_region=True, |
| 631 | + huggingface_model_id="Qwen/Qwen3-4B", |
| 632 | + modelscope_model_id="Qwen/Qwen3-4B", |
| 633 | + require_huggingface_token=False, |
| 634 | + application_scenario="Agent, tool use, translation, summary", |
| 635 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 636 | + model_type=ModelType.LLM, |
| 637 | + model_series=QWEN3_SERIES |
| 638 | + ) |
| 639 | +) |
| 640 | + |
| 641 | + |
| 642 | +Model.register( |
| 643 | + dict( |
| 644 | + model_id = "Qwen3-14B", |
| 645 | + supported_engines=[vllm_qwen3_engin084], |
| 646 | + supported_instances=[ |
| 647 | + g5d12xlarge_instance, |
| 648 | + g5d24xlarge_instance, |
| 649 | + g5d48xlarge_instance, |
| 650 | + # g5d24xlarge_instance, |
| 651 | + # g5d48xlarge_instance, |
| 652 | + local_instance |
| 653 | + ], |
| 654 | + supported_services=[ |
| 655 | + sagemaker_service, |
| 656 | + sagemaker_async_service, |
| 657 | + ecs_service, |
| 658 | + local_service |
| 659 | + ], |
| 660 | + supported_frameworks=[ |
| 661 | + fastapi_framework |
| 662 | + ], |
| 663 | + allow_china_region=True, |
| 664 | + huggingface_model_id="Qwen/Qwen3-14B", |
| 665 | + modelscope_model_id="Qwen/Qwen3-14B", |
| 666 | + require_huggingface_token=False, |
| 667 | + application_scenario="Agent, tool use, translation, summary", |
| 668 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 669 | + model_type=ModelType.LLM, |
| 670 | + model_series=QWEN3_SERIES |
| 671 | + ) |
| 672 | +) |
| 673 | + |
| 674 | +Model.register( |
| 675 | + dict( |
| 676 | + model_id = "Qwen3-32B", |
| 677 | + supported_engines=[vllm_qwen3_engin084], |
| 678 | + supported_instances=[ |
| 679 | + g5d12xlarge_instance, |
| 680 | + g5d24xlarge_instance, |
| 681 | + g5d48xlarge_instance, |
| 682 | + # g5d24xlarge_instance, |
| 683 | + # g5d48xlarge_instance, |
| 684 | + local_instance |
| 685 | + ], |
| 686 | + supported_services=[ |
| 687 | + sagemaker_service, |
| 688 | + sagemaker_async_service, |
| 689 | + ecs_service, |
| 690 | + local_service |
| 691 | + ], |
| 692 | + supported_frameworks=[ |
| 693 | + fastapi_framework |
| 694 | + ], |
| 695 | + allow_china_region=True, |
| 696 | + huggingface_model_id="Qwen/Qwen3-32B", |
| 697 | + modelscope_model_id="Qwen/Qwen3-32B", |
| 698 | + require_huggingface_token=False, |
| 699 | + application_scenario="Agent, tool use, translation, summary", |
| 700 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 701 | + model_type=ModelType.LLM, |
| 702 | + model_series=QWEN3_SERIES |
| 703 | + ) |
| 704 | +) |
| 705 | + |
| 706 | + |
| 707 | +Model.register( |
| 708 | + dict( |
| 709 | + model_id = "Qwen3-30B-A3B", |
| 710 | + supported_engines=[vllm_qwen3_engin084], |
| 711 | + supported_instances=[ |
| 712 | + g5d12xlarge_instance, |
| 713 | + g5d24xlarge_instance, |
| 714 | + g5d48xlarge_instance, |
| 715 | + # g5d24xlarge_instance, |
| 716 | + # g5d48xlarge_instance, |
| 717 | + local_instance |
| 718 | + ], |
| 719 | + supported_services=[ |
| 720 | + sagemaker_service, |
| 721 | + sagemaker_async_service, |
| 722 | + ecs_service, |
| 723 | + local_service |
| 724 | + ], |
| 725 | + supported_frameworks=[ |
| 726 | + fastapi_framework |
| 727 | + ], |
| 728 | + allow_china_region=True, |
| 729 | + huggingface_model_id="Qwen/Qwen3-30B-A3B", |
| 730 | + modelscope_model_id="Qwen/Qwen3-30B-A3B", |
| 731 | + require_huggingface_token=False, |
| 732 | + application_scenario="Agent, tool use, translation, summary", |
| 733 | + description="The latest series of Qwen LLMs, offers base and tuned models from 0.5B to 72B\n parameters, featuring enhanced knowledge, improved coding and math skills, better instruction\n following, long-text generation, structured data handling, 128K token context support, and\n multilingual capabilities for 29+ languages.", |
| 734 | + model_type=ModelType.LLM, |
| 735 | + model_series=QWEN3_SERIES |
| 736 | + ) |
| 737 | +) |
0 commit comments