Skip to content

Commit 8ec10a5

Browse files
add new shape and exception handling
1 parent 575fb31 commit 8ec10a5

File tree

4 files changed

+29
-16
lines changed

4 files changed

+29
-16
lines changed

ads/aqua/config/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
32
# Copyright (c) 2024 Oracle and/or its affiliates.
43
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
54

@@ -14,5 +13,6 @@ def get_finetuning_config_defaults():
1413
"BM.GPU.A10.4": {"batch_size": 1, "replica": 1},
1514
"BM.GPU4.8": {"batch_size": 4, "replica": 1},
1615
"BM.GPU.A100-v2.8": {"batch_size": 6, "replica": 1},
16+
"BM.GPU.H100.8": {"batch_size": 6, "replica": 1},
1717
}
1818
}

ads/aqua/config/deployment_config_defaults.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
"BM.GPU.A10.4",
3232
"BM.GPU4.8",
3333
"BM.GPU.A100-v2.8",
34+
"BM.GPU.H100.8",
3435
"VM.Standard.A1.Flex"
3536
]
3637
}

ads/aqua/config/resource_limit_names.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"BM.GPU.A10.4": "ds-gpu-a10-count",
33
"BM.GPU.A100-v2.8": "ds-gpu-a100-v2-count",
4+
"BM.GPU.H100.8": "ds-gpu-h100-count",
45
"BM.GPU4.8": "ds-gpu4-count",
56
"VM.GPU.A10.1": "ds-gpu-a10-count",
67
"VM.GPU.A10.2": "ds-gpu-a10-count"

ads/aqua/evaluation/evaluation.py

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import re
88
import tempfile
99
from concurrent.futures import ThreadPoolExecutor, as_completed
10-
from dataclasses import asdict
10+
from dataclasses import asdict, fields
1111
from datetime import datetime, timedelta
1212
from pathlib import Path
1313
from threading import Lock
@@ -160,8 +160,9 @@ def create(
160160
create_aqua_evaluation_details = CreateAquaEvaluationDetails(**kwargs)
161161
except Exception as ex:
162162
raise AquaValueError(
163-
"Invalid create evaluation parameters. Allowable parameters are: "
164-
f"{', '.join(list(asdict(CreateAquaEvaluationDetails).keys()))}."
163+
"Invalid create evaluation parameters. "
164+
"Allowable parameters are: "
165+
f"{', '.join([field.name for field in fields(CreateAquaEvaluationDetails)])}."
165166
) from ex
166167

167168
if not is_valid_ocid(create_aqua_evaluation_details.evaluation_source_id):
@@ -178,18 +179,28 @@ def create(
178179
evaluation_source = ModelDeployment.from_id(
179180
create_aqua_evaluation_details.evaluation_source_id
180181
)
181-
if evaluation_source.runtime.type == ModelDeploymentRuntimeType.CONTAINER:
182-
runtime = ModelDeploymentContainerRuntime.from_dict(
183-
evaluation_source.runtime.to_dict()
182+
try:
183+
if (
184+
evaluation_source.runtime.type
185+
== ModelDeploymentRuntimeType.CONTAINER
186+
):
187+
runtime = ModelDeploymentContainerRuntime.from_dict(
188+
evaluation_source.runtime.to_dict()
189+
)
190+
inference_config = AquaContainerConfig.from_container_index_json(
191+
enable_spec=True
192+
).inference
193+
for container in inference_config.values():
194+
if container.name == runtime.image.split(":")[0]:
195+
eval_inference_configuration = (
196+
container.spec.evaluation_configuration
197+
)
198+
except Exception:
199+
logger.debug(
200+
f"Could not load inference config details for the evaluation id: "
201+
f"{create_aqua_evaluation_details.evaluation_source_id}. Please check if the container"
202+
f" runtime has the correct SMC image information."
184203
)
185-
inference_config = AquaContainerConfig.from_container_index_json(
186-
enable_spec=True
187-
).inference
188-
for container in inference_config.values():
189-
if container.name == runtime.image.split(":")[0]:
190-
eval_inference_configuration = (
191-
container.spec.evaluation_configuration
192-
)
193204
elif (
194205
DataScienceResource.MODEL
195206
in create_aqua_evaluation_details.evaluation_source_id
@@ -550,7 +561,7 @@ def _build_evaluation_runtime(
550561
metrics=metrics,
551562
),
552563
),
553-
**inference_configuration,
564+
**(inference_configuration or {}),
554565
},
555566
),
556567
"CONDA_BUCKET_NS": CONDA_BUCKET_NS,

0 commit comments

Comments
 (0)