Skip to content

Commit 9aec2a9

Browse files
committed
Updated pr.
1 parent 2ba7278 commit 9aec2a9

File tree

2 files changed

+17
-12
lines changed

2 files changed

+17
-12
lines changed

ads/model/deployment/model_deployment.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
from ads.model.common.utils import _is_json_serializable
3030
from ads.model.deployment.common.utils import send_request
3131
from ads.model.deployment.model_deployment_infrastructure import (
32+
DEFAULT_BANDWIDTH_MBPS,
33+
DEFAULT_REPLICA,
34+
DEFAULT_SHAPE_NAME,
35+
DEFAULT_OCPUS,
36+
DEFAULT_MEMORY_IN_GBS,
3237
MODEL_DEPLOYMENT_INFRASTRUCTURE_TYPE,
3338
ModelDeploymentInfrastructure,
3439
)
@@ -64,12 +69,6 @@
6469
MODEL_DEPLOYMENT_TYPE = "modelDeployment"
6570
MODEL_DEPLOYMENT_INFERENCE_SERVER_TRITON = "TRITON"
6671

67-
MODEL_DEPLOYMENT_INSTANCE_SHAPE = "VM.Standard.E4.Flex"
68-
MODEL_DEPLOYMENT_INSTANCE_OCPUS = 1
69-
MODEL_DEPLOYMENT_INSTANCE_MEMORY_IN_GBS = 16
70-
MODEL_DEPLOYMENT_INSTANCE_COUNT = 1
71-
MODEL_DEPLOYMENT_BANDWIDTH_MBPS = 10
72-
7372
MODEL_DEPLOYMENT_RUNTIMES = {
7473
ModelDeploymentRuntimeType.CONDA: ModelDeploymentCondaRuntime,
7574
ModelDeploymentRuntimeType.CONTAINER: ModelDeploymentContainerRuntime,
@@ -1601,7 +1600,7 @@ def _build_model_deployment_configuration_details(self) -> Dict:
16011600

16021601
instance_configuration = {
16031602
infrastructure.CONST_INSTANCE_SHAPE_NAME: infrastructure.shape_name
1604-
or MODEL_DEPLOYMENT_INSTANCE_SHAPE,
1603+
or DEFAULT_SHAPE_NAME,
16051604
}
16061605

16071606
if instance_configuration[infrastructure.CONST_INSTANCE_SHAPE_NAME].endswith(
@@ -1613,14 +1612,14 @@ def _build_model_deployment_configuration_details(self) -> Dict:
16131612
infrastructure.CONST_OCPUS: infrastructure.shape_config_details.get(
16141613
"ocpus", None
16151614
)
1616-
or MODEL_DEPLOYMENT_INSTANCE_OCPUS,
1615+
or DEFAULT_OCPUS,
16171616
infrastructure.CONST_MEMORY_IN_GBS: infrastructure.shape_config_details.get(
16181617
"memory_in_gbs", None
16191618
)
16201619
or infrastructure.shape_config_details.get(
16211620
"memoryInGBs", None
16221621
)
1623-
or MODEL_DEPLOYMENT_INSTANCE_MEMORY_IN_GBS,
1622+
or DEFAULT_MEMORY_IN_GBS,
16241623
}
16251624

16261625
if infrastructure.subnet_id:
@@ -1629,7 +1628,7 @@ def _build_model_deployment_configuration_details(self) -> Dict:
16291628
scaling_policy = {
16301629
infrastructure.CONST_POLICY_TYPE: "FIXED_SIZE",
16311630
infrastructure.CONST_INSTANCE_COUNT: infrastructure.replica
1632-
or MODEL_DEPLOYMENT_INSTANCE_COUNT,
1631+
or DEFAULT_REPLICA,
16331632
}
16341633

16351634
if not runtime.model_uri:
@@ -1660,7 +1659,7 @@ def _build_model_deployment_configuration_details(self) -> Dict:
16601659

16611660
model_configuration_details = {
16621661
infrastructure.CONST_BANDWIDTH_MBPS: infrastructure.bandwidth_mbps
1663-
or MODEL_DEPLOYMENT_BANDWIDTH_MBPS,
1662+
or DEFAULT_BANDWIDTH_MBPS,
16641663
infrastructure.CONST_INSTANCE_CONFIG: instance_configuration,
16651664
runtime.CONST_MODEL_ID: model_id,
16661665
infrastructure.CONST_SCALING_POLICY: scaling_policy,

ads/model/deployment/model_deployment_infrastructure.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
DEFAULT_BANDWIDTH_MBPS = 10
2323
DEFAULT_WEB_CONCURRENCY = 10
2424
DEFAULT_REPLICA = 1
25-
DEFAULT_SHAPE_NAME = "VM.Standard.E2.4"
25+
DEFAULT_SHAPE_NAME = "VM.Standard.E4.Flex"
26+
DEFAULT_OCPUS = 1
27+
DEFAULT_MEMORY_IN_GBS = 16
2628

2729
logger = logging.getLogger(__name__)
2830

@@ -625,4 +627,8 @@ def init(self) -> "ModelDeploymentInfrastructure":
625627
.with_web_concurrency(self.web_concurrency or DEFAULT_WEB_CONCURRENCY)
626628
.with_replica(self.replica or DEFAULT_REPLICA)
627629
.with_shape_name(self.shape_name or DEFAULT_SHAPE_NAME)
630+
.with_shape_config_details(
631+
ocpus=self.shape_config_details.get(self.CONST_OCPUS, DEFAULT_OCPUS),
632+
memory_in_gbs=self.shape_config_details.get(self.CONST_MEMORY_IN_GBS, DEFAULT_MEMORY_IN_GBS)
633+
)
628634
)

0 commit comments

Comments
 (0)