Addressing review comments

kumar-shivam-ranjan · kumar-shivam-ranjan · commit 5f3067c2069d · 2024-07-11T11:50:55.000+05:30
diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
@@ -49,6 +49,7 @@ class InferenceContainerType(str, metaclass=ExtendedEnumMeta):
 class InferenceContainerTypeFamily(str, metaclass=ExtendedEnumMeta):
     AQUA_VLLM_CONTAINER_FAMILY = "odsc-vllm-serving"
     AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
+    AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
 
 
 class InferenceContainerParamType(str, metaclass=ExtendedEnumMeta):
diff --git a/ads/aqua/constants.py b/ads/aqua/constants.py
@@ -21,7 +21,8 @@
 DEFAULT_FT_REPLICA = 1
 DEFAULT_FT_BATCH_SIZE = 1
 DEFAULT_FT_VALIDATION_SET_SIZE = 0.1
-
+ARM_CPU="arm_cpu"
+NVIDIA_GPU="nvidia_gpu"
 MAXIMUM_ALLOWED_DATASET_IN_BYTE = 52428800  # 1024 x 1024 x 50 = 50MB
 JOB_INFRASTRUCTURE_TYPE_DEFAULT_NETWORKING = "ME_STANDALONE"
 NB_SESSION_IDENTIFIER = "NB_SESSION_OCID"
diff --git a/ads/aqua/model/entities.py b/ads/aqua/model/entities.py
@@ -76,7 +76,7 @@ class AquaModelSummary(DataClassSerializable):
     ready_to_deploy: bool = True
     ready_to_finetune: bool = False
     ready_to_import: bool = False
-    platform: List[str] = field(default_factory=lambda: ["gpu"])
+    platform: List[str] = field(default_factory=lambda: ["nvidia_gpu"])
 
 
 @dataclass(repr=False)
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
@@ -11,20 +11,16 @@
 
 from ads.aqua import ODSC_MODEL_COMPARTMENT_OCID
 from ads.aqua.app import AquaApp
-from ads.aqua.common.enums import Tags
+from ads.aqua.common.enums import Tags, InferenceContainerTypeFamily
 from ads.aqua.common.errors import AquaRuntimeError
 from ads.aqua.common.utils import (
-    copy_model_config,
     create_word_icon,
     get_artifact_path,
-    load_config,
     read_file,
+    copy_model_config,
+    load_config,
 )
 from ads.aqua.constants import (
-    AQUA_MODEL_ARTIFACT_CONFIG,
-    AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME,
-    AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE,
-    AQUA_MODEL_TYPE_CUSTOM,
     LICENSE_TXT,
     MODEL_BY_REFERENCE_OSS_PATH_KEY,
     README,
@@ -36,6 +32,10 @@
     UNKNOWN,
     VALIDATION_METRICS,
     VALIDATION_METRICS_FINAL,
+    AQUA_MODEL_ARTIFACT_CONFIG,
+    AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME,
+    AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE,
+    AQUA_MODEL_TYPE_CUSTOM, ARM_CPU, NVIDIA_GPU,
 )
 from ads.aqua.model.constants import *
 from ads.aqua.model.entities import *
@@ -235,7 +235,7 @@ def get(self, model_id: str, load_model_card: Optional[bool] = True) -> "AquaMod
             try:
                 jobrun_ocid = ds_model.provenance_metadata.training_id
                 jobrun = self.ds_client.get_job_run(jobrun_ocid).data
-            except Exception:
+            except Exception as e:
                 logger.debug(
                     f"Missing jobrun information in the provenance metadata of the given model {model_id}."
                 )
@@ -580,16 +580,15 @@ def _create_model_catalog_entry(
             {
                 **verified_model.freeform_tags,
                 Tags.AQUA_SERVICE_MODEL_TAG: verified_model.id,
-                Tags.PLATFORM: "cpu" if is_gguf_model else "gpu",
             }
             if verified_model
             else {
                 Tags.AQUA_TAG: "active",
                 Tags.BASE_MODEL_CUSTOM: "true",
-                Tags.PLATFORM: "cpu" if is_gguf_model else "gpu",
             }
         )
         tags.update({Tags.BASE_MODEL_CUSTOM: "true"})
+        tags.update({Tags.PLATFORM: ARM_CPU if is_gguf_model else NVIDIA_GPU})
 
         # Remove `ready_to_import` tag that might get copied from service model.
         tags.pop(Tags.READY_TO_IMPORT, None)
@@ -700,7 +699,8 @@ def register(
         model_config = None
         if not import_model_details:
             import_model_details = ImportModelDetails(**kwargs)
-        is_gguf_model = import_model_details.inference_container == "odsc-llama-cpp"
+        is_gguf_model = import_model_details.inference_container == InferenceContainerTypeFamily.AQUA_LLAMA_CPP_CONTAINER_FAMILY
+        platform = ARM_CPU if is_gguf_model else NVIDIA_GPU
         if not is_gguf_model:
             try:
                 model_config = load_config(
@@ -792,7 +792,6 @@ def register(
         except:
             finetuning_container = None
 
-        platform = "cpu" if is_gguf_model else "gpu"
         aqua_model_attributes = dict(
             **self._process_model(ds_model, self.region),
             project_id=ds_model.project_id,