Skip to content

Commit ab1e566

Browse files
authored
feat: add txgemma (#94)
* merge * merge * add Mistral-Small-3.1-24B-Instruct-2503 * modify qwq-32b deploy * add txgemma model;
1 parent f4b88fa commit ab1e566

File tree

12 files changed

+209
-32
lines changed

12 files changed

+209
-32
lines changed

docs/en/best_deployment_practices.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,48 @@ emd deploy --model-id Qwen2.5-14B-Instruct-AWQ --instance-type g4dn.2xlarge --en
6464
}'
6565
```
6666

67+
### Example: Customize model download methods
68+
- You can load models from different locations by addingappropriate values in the extra-params parameter
69+
1. Load model from S3
70+
```json
71+
{
72+
"model_params":{
73+
"model_files_s3_path":"<S3_PATH>"
74+
}
75+
}
76+
```
77+
2. Load model from local path (only applicable for local deployment)
78+
```json
79+
{
80+
"model_params": { "model_files_local_path":"<LOCAL_PATH>"
81+
}
82+
}
83+
```
84+
3. Skip downloading and uploading model files in codebuild, which will significantly reducedeployment time
85+
```json
86+
{
87+
"model_params": {
88+
"need_prepare_model":false
89+
}
90+
}
91+
```
92+
4. Specify the download source for model files
93+
```json
94+
{
95+
"model_params":{
96+
"model_files_download_source":"huggingface|modelscope|auto(default)"
97+
}
98+
}
99+
```
100+
5. Specify the model ID on huggingface or modelscope
101+
```json
102+
{
103+
"model_params": {
104+
"huggingface_model_id":"model id on huggingface","modelscope_model_id":"model id on modelscope"
105+
}
106+
}
107+
```
108+
67109
## Environmental variables
68110
- `LOCAL_DEPLOY_PORT: ` Local deployment port, default: `8080`
69111

src/emd/constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .revision import VERSION, convert_version_name_to_stack_name
2+
import os
23
ENV_STACK_NAME = f'EMD-Env'
34
MODEL_STACK_NAME_PREFIX = f"EMD-Model"
45
ENV_BUCKET_NAME_PREFIX = "emd-env-artifactbucket"
@@ -25,3 +26,8 @@
2526

2627
LOCAL_REGION = "local"
2728
# EMD_USE_NO_PROFILE_CHOICE = "Don't set"
29+
30+
LOCAL_DEPLOY_PIPELINE_ZIP_DIR = os.path.join(
31+
os.path.expanduser("~"),
32+
f"emd_{VERSION}"
33+
)

src/emd/models/engines.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,18 @@ class KtransformersEngine(OpenAICompitableEngine):
7878
)
7979

8080

81+
vllm_texgemma082 = VllmEngine(**{
82+
"engine_type":EngineType.VLLM,
83+
"engine_dockerfile_config": {"VERSION":"v0.8.2"},
84+
"engine_cls":"vllm.vllm_backend.VLLMBackend",
85+
"base_image_host":"public.ecr.aws",
86+
"use_public_ecr":True,
87+
"docker_login_region":"us-east-1",
88+
"default_cli_args": " --max_num_seq 10 --disable-log-stats"
89+
}
90+
)
91+
92+
8193
vllm_mistral_small_engine082 = VllmEngine(
8294
**{
8395
**vllm_engine064.model_dump(),

src/emd/models/llms/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55
llama,
66
deepseek,
77
baichuan,
8-
jina
8+
jina,
9+
txgemma
910
)

src/emd/models/llms/deepseek.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -334,33 +334,33 @@
334334
)
335335
)
336336

337-
Model.register(
338-
dict(
339-
model_id = "deepseek-r1-671b-1.58bit_ollama",
340-
supported_engines=[ollama_deepseek_r1_qwen2d5_1d5b_engine057],
341-
supported_instances=[
342-
g5d48xlarge_instance,
343-
local_instance
344-
],
345-
supported_services=[
346-
sagemaker_service,
347-
sagemaker_async_service,
348-
ecs_service,
349-
local_service
350-
],
351-
supported_frameworks=[
352-
fastapi_framework
353-
],
354-
allow_china_region=False,
355-
ollama_model_id="SIGJNF/deepseek-r1-671b-1.58bit",
356-
# modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
357-
require_huggingface_token=False,
358-
application_scenario="Agent, tool use, translation, summary",
359-
description="The latest series of DeepSeek LLMs for reasoning",
360-
model_type=ModelType.LLM,
361-
model_series=DEEPSEEK_REASONING_MODEL
362-
)
363-
)
337+
# Model.register(
338+
# dict(
339+
# model_id = "deepseek-r1-671b-1.58bit_ollama",
340+
# supported_engines=[ollama_deepseek_r1_qwen2d5_1d5b_engine057],
341+
# supported_instances=[
342+
# g5d48xlarge_instance,
343+
# local_instance
344+
# ],
345+
# supported_services=[
346+
# sagemaker_service,
347+
# sagemaker_async_service,
348+
# ecs_service,
349+
# local_service
350+
# ],
351+
# supported_frameworks=[
352+
# fastapi_framework
353+
# ],
354+
# allow_china_region=False,
355+
# ollama_model_id="SIGJNF/deepseek-r1-671b-1.58bit",
356+
# # modelscope_model_id="Qwen/Qwen2.5-14B-Instruct",
357+
# require_huggingface_token=False,
358+
# application_scenario="Agent, tool use, translation, summary",
359+
# description="The latest series of DeepSeek LLMs for reasoning",
360+
# model_type=ModelType.LLM,
361+
# model_series=DEEPSEEK_REASONING_MODEL
362+
# )
363+
# )
364364

365365

366366
Model.register(

src/emd/models/llms/txgemma.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
from ..engines import vllm_texgemma082
2+
from .. import Model
3+
from ..frameworks import fastapi_framework
4+
from ..services import (
5+
sagemaker_service,
6+
sagemaker_async_service,
7+
ecs_service,
8+
local_service
9+
)
10+
from emd.models.utils.constants import ModelType
11+
from ..model_series import TXGEMMA_SERIES
12+
from ..instances import (
13+
g5d2xlarge_instance,
14+
g5d4xlarge_instance,
15+
g5d8xlarge_instance,
16+
g5d12xlarge_instance,
17+
g5d16xlarge_instance,
18+
g5d24xlarge_instance,
19+
g5d48xlarge_instance,
20+
g6e2xlarge_instance,
21+
local_instance
22+
)
23+
from ..utils.constants import ModelFilesDownloadSource
24+
25+
26+
Model.register(
27+
dict(
28+
model_id = "txgemma-9b-chat",
29+
supported_engines=[vllm_texgemma082],
30+
supported_instances=[
31+
g5d12xlarge_instance,
32+
g5d24xlarge_instance,
33+
g5d48xlarge_instance,
34+
g5d2xlarge_instance,
35+
g5d4xlarge_instance,
36+
g5d8xlarge_instance,
37+
g5d16xlarge_instance,
38+
local_instance
39+
],
40+
disable_hf_transfer=True,
41+
supported_services=[
42+
sagemaker_service,
43+
sagemaker_async_service,
44+
ecs_service,
45+
local_service
46+
],
47+
supported_frameworks=[
48+
fastapi_framework
49+
],
50+
huggingface_model_id="google/txgemma-9b-chat",
51+
modelscope_model_id="AI-ModelScope/txgemma-9b-chat",
52+
model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
53+
# require_huggingface_token=True,
54+
application_scenario="llms for the development of therapeutics.",
55+
description="The latest series of txgemma",
56+
model_type=ModelType.LLM,
57+
model_series=TXGEMMA_SERIES,
58+
)
59+
)
60+
61+
62+
Model.register(
63+
dict(
64+
model_id = "txgemma-27b-chat",
65+
supported_engines=[vllm_texgemma082],
66+
supported_instances=[
67+
g5d12xlarge_instance,
68+
g5d24xlarge_instance,
69+
g5d48xlarge_instance,
70+
local_instance
71+
],
72+
disable_hf_transfer=True,
73+
supported_services=[
74+
sagemaker_service,
75+
sagemaker_async_service,
76+
ecs_service,
77+
local_service
78+
],
79+
supported_frameworks=[
80+
fastapi_framework
81+
],
82+
huggingface_model_id="google/txgemma-27b-chat",
83+
modelscope_model_id="AI-ModelScope/txgemma-27b-chat",
84+
model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
85+
# require_huggingface_token=True,
86+
application_scenario="llms for the development of therapeutics.",
87+
description="The latest series of txgemma",
88+
model_type=ModelType.LLM,
89+
model_series=TXGEMMA_SERIES,
90+
)
91+
)

src/emd/models/model_series.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,13 @@
9797
reference_link="https://blog.google/technology/developers/gemma-3/"
9898
)
9999

100+
TXGEMMA_SERIES = ModelSeries(
101+
model_series_name=ModelSeriesType.TXGEMMA,
102+
description="TXGemma is a series of open models to accelerate the development of therapeutics.",
103+
reference_link="https://huggingface.co/collections/google/txgemma-release-67dd92e931c857d15e4d1e87"
104+
)
105+
106+
100107
MISTRAL_SERIES = ModelSeries(
101108
model_series_name=ModelSeriesType.MISTRAL,
102109
description="LLMs and VLMs provided by MISTRAL AI.",

src/emd/models/services.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191
"ServiceType":"service_type",
9292
"EngineType":"engine_type",
9393
"Region": "region",
94-
"DesiredCapacity": "desired_capacity",
94+
"DesiredCapacity": ValueWithDefault(name="desired_capacity",default=1),
9595
"ContainerCpu": "container_cpu",
9696
"ContainerMemory": "container_memory",
9797
"ContainerGpu":"instance_gpu_num"

src/emd/models/utils/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ def get_service_quota_code(cls, instance_type: str):
214214

215215
class ModelSeriesType(ConstantBase):
216216
GEMMA3 = "gemma3"
217+
TXGEMMA = "txgemma"
217218
MISTRAL = "mistral"
218219
QWEN2D5 = "qwen2.5"
219220
GLM4 = "glm4"

src/emd/models/vlms/gemma3.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from emd.models.utils.constants import ModelType
1111
from ..model_series import Gemma3_SERIES
1212
from ..instances import (
13+
g4dn12xlarge_instance,
1314
g5d2xlarge_instance,
1415
g5d4xlarge_instance,
1516
g5d8xlarge_instance,
@@ -43,6 +44,7 @@
4344
supported_frameworks=[
4445
fastapi_framework
4546
],
47+
allow_china_region = True,
4648
modelscope_model_id="LLM-Research/gemma-3-4b-it",
4749
model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,
4850
# require_huggingface_token=False,
@@ -74,6 +76,7 @@
7476
supported_frameworks=[
7577
fastapi_framework
7678
],
79+
allow_china_region = True,
7780
# huggingface_model_id="google/gemma-3-12b-it",
7881
# require_huggingface_token=False,
7982
modelscope_model_id="LLM-Research/gemma-3-12b-it",
@@ -106,6 +109,7 @@
106109
supported_frameworks=[
107110
fastapi_framework
108111
],
112+
allow_china_region = True,
109113
# huggingface_model_id="unsloth/gemma-3-27b-it",
110114
modelscope_model_id="LLM-Research/gemma-3-27b-it",
111115
model_files_download_source=ModelFilesDownloadSource.MODELSCOPE,

0 commit comments

Comments
 (0)