Skip to content

Commit 619a925

Browse files
committed
ODSC-70841 Improve Model Deployment Status Tracking
1 parent f6e3f00 commit 619a925

File tree

2 files changed

+42
-18
lines changed

2 files changed

+42
-18
lines changed

ads/aqua/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
SERVICE_MANAGED_CONTAINER_URI_SCHEME = "dsmc://"
5555
SUPPORTED_FILE_FORMATS = ["jsonl"]
5656
MODEL_BY_REFERENCE_OSS_PATH_KEY = "artifact_location"
57+
DEFAULT_WAIT_TIME = 1200
58+
DEFAULT_POLL_INTERVAL = 10
5759

5860
CONSOLE_LINK_RESOURCE_TYPE_MAPPING = {
5961
"datasciencemodel": "models",

ads/aqua/modeldeployment/deployment.py

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from typing import Dict, List, Optional, Union
99

1010
from cachetools import TTLCache, cached
11+
import concurrent
12+
from ads.common.work_request import DataScienceWorkRequest
1113
from oci.data_science.models import ModelDeploymentShapeSummary
1214
from pydantic import ValidationError
1315

@@ -43,6 +45,8 @@
4345
MODEL_BY_REFERENCE_OSS_PATH_KEY,
4446
MODEL_NAME_DELIMITER,
4547
UNKNOWN_DICT,
48+
DEFAULT_WAIT_TIME,
49+
DEFAULT_POLL_INTERVAL
4650
)
4751
from ads.aqua.data import AquaResourceIdentifier
4852
from ads.aqua.model import AquaModelApp
@@ -80,6 +84,9 @@
8084
from ads.model.model_metadata import ModelCustomMetadataItem
8185
from ads.telemetry import telemetry
8286

87+
THREAD_POOL_SIZE = 16
88+
thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=THREAD_POOL_SIZE)
89+
8390

8491
class AquaDeploymentApp(AquaApp):
8592
"""Provides a suite of APIs to interact with Aqua model deployments within the Oracle
@@ -780,11 +787,18 @@ def _create_deployment(
780787
.with_runtime(container_runtime)
781788
).deploy(wait_for_completion=False)
782789

783-
deployment_id = deployment.id
790+
791+
792+
deployment_id = deployment.id()
784793
logger.info(
785794
f"Aqua model deployment {deployment_id} created for model {aqua_model_id}."
786795
)
787796

797+
thread_pool.submit(self.get_deployment_status,
798+
model_deployment_id=deployment_id,
799+
work_request_id=deployment.dsc_model_deployment.workflow_req_id,
800+
model_type=model_type)
801+
788802
# we arbitrarily choose last 8 characters of OCID to identify MD in telemetry
789803
telemetry_kwargs = {"ocid": get_ocid_substring(deployment_id, key_len=8)}
790804

@@ -1312,22 +1326,30 @@ def list_shapes(self, **kwargs) -> List[ComputeShapeSummary]:
13121326
]
13131327

13141328

1315-
def get_deployment_status(self,model_deployment_id: str, work_request_id : str) :
1316-
# category= "aqua/{model_type}/deployment/status", action= "FAILED/SUCCEEDED", detail="Error message from Work request", value= {"ocid": md_ocid[:8]}
1317-
# # tracks unique evaluation that were created for the given evaluation source
1318-
# self.telemetry.record_event_async(
1319-
# category="aqua/evaluation",
1320-
# action="create",
1321-
# detail=self._get_service_model_name(evaluation_source),
1322-
# )
1323-
1324-
1325-
return
1329+
def get_deployment_status(self,model_deployment_id: str, work_request_id : str, model_type : str) :
1330+
1331+
telemetry_kwargs = {"ocid": get_ocid_substring(model_deployment_id, key_len=8)}
13261332

1327-
def get_deployment_status_async(self,model_deployment_id: str, work_request_id : str) :
1328-
# tracks unique evaluation that were created for the given evaluation source
1333+
try:
1334+
DataScienceWorkRequest(work_request_id).wait_work_request(
1335+
progress_bar_description="Creating model deployment",
1336+
max_wait_time=DEFAULT_WAIT_TIME,
1337+
poll_interval=DEFAULT_POLL_INTERVAL
1338+
)
1339+
except Exception as e:
1340+
logger.error(
1341+
"Error while trying to create model deployment: " + str(e)
1342+
)
1343+
self.telemetry.record_event_async(
1344+
category=f"aqua/{model_type}/deployment/status",
1345+
action="FAILED",
1346+
detail="Error creating model deployment"
1347+
**telemetry_kwargs
1348+
)
1349+
13291350
self.telemetry.record_event_async(
1330-
category="aqua/evaluation",
1331-
action="create",
1332-
detail=self._get_service_model_name(evaluation_source),
1333-
)
1351+
category=f"aqua/{model_type}/deployment/status",
1352+
action="SUCCEEDED",
1353+
detail=" Create model deployment successful",
1354+
**telemetry_kwargs
1355+
)

0 commit comments

Comments
 (0)