29
29
from ads .model .common .utils import _is_json_serializable
30
30
from ads .model .deployment .common .utils import send_request
31
31
from ads .model .deployment .model_deployment_infrastructure import (
32
+ DEFAULT_BANDWIDTH_MBPS ,
33
+ DEFAULT_REPLICA ,
34
+ DEFAULT_SHAPE_NAME ,
35
+ DEFAULT_OCPUS ,
36
+ DEFAULT_MEMORY_IN_GBS ,
32
37
MODEL_DEPLOYMENT_INFRASTRUCTURE_TYPE ,
33
38
ModelDeploymentInfrastructure ,
34
39
)
64
69
MODEL_DEPLOYMENT_TYPE = "modelDeployment"
65
70
MODEL_DEPLOYMENT_INFERENCE_SERVER_TRITON = "TRITON"
66
71
67
- MODEL_DEPLOYMENT_INSTANCE_SHAPE = "VM.Standard.E4.Flex"
68
- MODEL_DEPLOYMENT_INSTANCE_OCPUS = 1
69
- MODEL_DEPLOYMENT_INSTANCE_MEMORY_IN_GBS = 16
70
- MODEL_DEPLOYMENT_INSTANCE_COUNT = 1
71
- MODEL_DEPLOYMENT_BANDWIDTH_MBPS = 10
72
-
73
72
MODEL_DEPLOYMENT_RUNTIMES = {
74
73
ModelDeploymentRuntimeType .CONDA : ModelDeploymentCondaRuntime ,
75
74
ModelDeploymentRuntimeType .CONTAINER : ModelDeploymentContainerRuntime ,
@@ -1601,7 +1600,7 @@ def _build_model_deployment_configuration_details(self) -> Dict:
1601
1600
1602
1601
instance_configuration = {
1603
1602
infrastructure .CONST_INSTANCE_SHAPE_NAME : infrastructure .shape_name
1604
- or MODEL_DEPLOYMENT_INSTANCE_SHAPE ,
1603
+ or DEFAULT_SHAPE_NAME ,
1605
1604
}
1606
1605
1607
1606
if instance_configuration [infrastructure .CONST_INSTANCE_SHAPE_NAME ].endswith (
@@ -1613,14 +1612,14 @@ def _build_model_deployment_configuration_details(self) -> Dict:
1613
1612
infrastructure .CONST_OCPUS : infrastructure .shape_config_details .get (
1614
1613
"ocpus" , None
1615
1614
)
1616
- or MODEL_DEPLOYMENT_INSTANCE_OCPUS ,
1615
+ or DEFAULT_OCPUS ,
1617
1616
infrastructure .CONST_MEMORY_IN_GBS : infrastructure .shape_config_details .get (
1618
1617
"memory_in_gbs" , None
1619
1618
)
1620
1619
or infrastructure .shape_config_details .get (
1621
1620
"memoryInGBs" , None
1622
1621
)
1623
- or MODEL_DEPLOYMENT_INSTANCE_MEMORY_IN_GBS ,
1622
+ or DEFAULT_MEMORY_IN_GBS ,
1624
1623
}
1625
1624
1626
1625
if infrastructure .subnet_id :
@@ -1629,7 +1628,7 @@ def _build_model_deployment_configuration_details(self) -> Dict:
1629
1628
scaling_policy = {
1630
1629
infrastructure .CONST_POLICY_TYPE : "FIXED_SIZE" ,
1631
1630
infrastructure .CONST_INSTANCE_COUNT : infrastructure .replica
1632
- or MODEL_DEPLOYMENT_INSTANCE_COUNT ,
1631
+ or DEFAULT_REPLICA ,
1633
1632
}
1634
1633
1635
1634
if not runtime .model_uri :
@@ -1660,7 +1659,7 @@ def _build_model_deployment_configuration_details(self) -> Dict:
1660
1659
1661
1660
model_configuration_details = {
1662
1661
infrastructure .CONST_BANDWIDTH_MBPS : infrastructure .bandwidth_mbps
1663
- or MODEL_DEPLOYMENT_BANDWIDTH_MBPS ,
1662
+ or DEFAULT_BANDWIDTH_MBPS ,
1664
1663
infrastructure .CONST_INSTANCE_CONFIG : instance_configuration ,
1665
1664
runtime .CONST_MODEL_ID : model_id ,
1666
1665
infrastructure .CONST_SCALING_POLICY : scaling_policy ,
0 commit comments