Merge branch 'main' into aqua/ADS_MS_changes

kumar-shivam-ranjan · web-flow · commit 7ff684fd9c83 · 2025-04-07T10:57:29.000+05:30
diff --git a/README-development.md b/README-development.md
@@ -4,7 +4,7 @@
 The Oracle Accelerated Data Science (ADS) SDK used by data scientists and analysts for
 data exploration and experimental machine learning to democratize machine learning and
 analytics by providing easy-to-use, 
-performant, and user friendly tools that
+performant, and user-friendly tools that
 brings together the best of data science practices.
 
 The ADS SDK helps you connect to different data sources, perform exploratory data analysis,
@@ -176,7 +176,7 @@ pip install -r test-requirements.txt
 ```
 
 ### Step 2: Create local .env files 
-Running the local JuypterLab server requires setting OCI authentication, proxy, and OCI namespace parameters. Adapt this .env file with your specific OCI profile and OCIDs to set these variables.
+Running the local JupyterLab server requires setting OCI authentication, proxy, and OCI namespace parameters. Adapt this .env file with your specific OCI profile and OCIDs to set these variables.
 
 ```
 CONDA_BUCKET_NS="your_conda_bucket"
diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
@@ -2,6 +2,8 @@
 # Copyright (c) 2024, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+from typing import Dict, List
+
 from ads.common.extended_enum import ExtendedEnum
 
 
@@ -106,3 +108,15 @@ class ModelFormat(ExtendedEnum):
 class Platform(ExtendedEnum):
     ARM_CPU = "ARM_CPU"
     NVIDIA_GPU = "NVIDIA_GPU"
+
+
+# This dictionary defines compatibility groups for container families.
+# The structure is:
+#   - Key: The preferred container family to use when multiple compatible families are selected.
+#   - Value: A list of all compatible families (including the preferred one).
+CONTAINER_FAMILY_COMPATIBILITY: Dict[str, List[str]] = {
+    InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY: [
+        InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
+        InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
+    ],
+}
diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
@@ -37,6 +37,7 @@
 
 from ads.aqua.common.entities import GPUShapesIndex
 from ads.aqua.common.enums import (
+    CONTAINER_FAMILY_COMPATIBILITY,
     InferenceContainerParamType,
     InferenceContainerType,
     RqsAdditionalDetails,
@@ -1277,3 +1278,40 @@ def load_gpu_shapes_index(
             )
 
     return GPUShapesIndex(**data)
+
+
+def get_preferred_compatible_family(selected_families: set[str]) -> str:
+    """
+    Determines the preferred container family from a given set of container families.
+
+    This method is used in the context of multi-model deployment to handle cases
+    where models selected for deployment use different, but compatible, container families.
+
+    It checks the input `families` set against the `CONTAINER_FAMILY_COMPATIBILITY` map.
+    If a compatibility group exists that fully includes all the families in the input,
+    the corresponding key (i.e., the preferred family) is returned.
+
+    Parameters
+    ----------
+    families : set[str]
+        A set of container family identifiers.
+
+    Returns
+    -------
+    Optional[str]
+        The preferred container family if all families are compatible within one group;
+        otherwise, returns `None` indicating that no compatible family group was found.
+
+    Example
+    -------
+    >>> get_preferred_compatible_family({"odsc-vllm-serving", "odsc-vllm-serving-v1"})
+    'odsc-vllm-serving-v1'
+
+    >>> get_preferred_compatible_family({"odsc-vllm-serving", "odsc-tgi-serving"})
+    None  # Incompatible families
+    """
+    for preferred, compatible_list in CONTAINER_FAMILY_COMPATIBILITY.items():
+        if selected_families.issubset(set(compatible_list)):
+            return preferred
+
+    return None
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
@@ -38,6 +38,7 @@
     generate_tei_cmd_var,
     get_artifact_path,
     get_hf_model_info,
+    get_preferred_compatible_family,
     list_os_files_with_extension,
     load_config,
     upload_folder,
@@ -340,15 +341,25 @@ def create_multi(
 
             selected_models_deployment_containers.add(deployment_container)
 
-        # Check if the all models in the group shares same container family
-        if len(selected_models_deployment_containers) > 1:
+        if not selected_models_deployment_containers:
             raise AquaValueError(
-                "The selected models are associated with different container families: "
-                f"{list(selected_models_deployment_containers)}."
-                "For multi-model deployment, all models in the group must share the same container family."
+                "None of the selected models are associated with a recognized container family. "
+                "Please review the selected models, or select a different group of models."
             )
 
-        deployment_container = selected_models_deployment_containers.pop()
+        # Check if the all models in the group shares same container family
+        if len(selected_models_deployment_containers) > 1:
+            deployment_container = get_preferred_compatible_family(
+                selected_families=selected_models_deployment_containers
+            )
+            if not deployment_container:
+                raise AquaValueError(
+                    "The selected models are associated with different container families: "
+                    f"{list(selected_models_deployment_containers)}."
+                    "For multi-model deployment, all models in the group must share the same container family."
+                )
+        else:
+            deployment_container = selected_models_deployment_containers.pop()
 
         # Generate model group details
         timestamp = datetime.now().strftime("%Y%m%d")
diff --git a/docs/source/user_guide/configuration/configuration.rst b/docs/source/user_guide/configuration/configuration.rst
@@ -296,7 +296,7 @@ encryption keys.
 
 Master encryption keys can be generated internally by the Vault service
 or imported to the service from an external source. Once a master
-encryption key has been created, the Oracle Cloud Infrastruture API can
+encryption key has been created, the Oracle Cloud Infrastructure API can
 be used to generate data encryption keys that the Vault service returns
 to you. by default, a wrapping key is included with each vault. A
 wrapping key is a 4096-bit asymmetric encryption key pair based on the
@@ -673,7 +673,7 @@ prints it. This shows that the password was actually updated.
         wait_for_states=[oci.vault.models.Secret.LIFECYCLE_STATE_ACTIVE]).data
 
     # The secret OCID does not change.
-    print("Orginal Secret OCID: {}".format(secret_id))
+    print("Original Secret OCID: {}".format(secret_id))
     print("Updated Secret OCID: {}".format(secret_update.id))
 
     ### Read a secret's value.
@@ -685,7 +685,7 @@ prints it. This shows that the password was actually updated.
 
 .. parsed-literal::
 
-    Orginal Secret OCID: ocid1.vaultsecret.oc1.iad.amaaaaaav66vvnia2bmkbroin34eu2ghmubvmrtjdgo4yr6daewakacwuk4q
+    Original Secret OCID: ocid1.vaultsecret.oc1.iad.amaaaaaav66vvnia2bmkbroin34eu2ghmubvmrtjdgo4yr6daewakacwuk4q
     Updated Secret OCID: ocid1.vaultsecret.oc1.iad.amaaaaaav66vvnia2bmkbroin34eu2ghmubvmrtjdgo4yr6daewakacwuk4q
     {'database': 'datamart', 'username': 'admin', 'password': 'UpdatedPassword'}
 
diff --git a/docs/source/user_guide/configuration/vault.rst b/docs/source/user_guide/configuration/vault.rst
@@ -239,7 +239,7 @@ also retrieves the updated secret, converts it into a dictionary, and prints it.
         wait_for_states=[oci.vault.models.Secret.LIFECYCLE_STATE_ACTIVE]).data
 
     # The secret OCID does not change.
-    print("Orginal Secret OCID: {}".format(secret_id))
+    print("Original Secret OCID: {}".format(secret_id))
     print("Updated Secret OCID: {}".format(secret_update.id))
 
     ### Read a secret's value.
@@ -251,7 +251,7 @@ also retrieves the updated secret, converts it into a dictionary, and prints it.
 
 .. parsed-literal::
 
-    Orginal Secret OCID: ocid1.vaultsecret..<unique_ID>
+    Original Secret OCID: ocid1.vaultsecret..<unique_ID>
     Updated Secret OCID: ocid1.vaultsecret..<unique_ID>
     {'database': 'datamart', 'username': 'admin', 'password': 'UpdatedPassword'}
 
diff --git a/docs/source/user_guide/data_flow/dataflow.rst b/docs/source/user_guide/data_flow/dataflow.rst
@@ -63,7 +63,7 @@ In the preparation stage, you prepare the configuration object necessary to crea
 * ``pyspark_file_path``: The local path to your ``PySpark`` script.
 * ``script_bucket``: The bucket used to read/write the ``PySpark`` script in Object Storage.
 
-ADS checks that the bucket exists, and that you can write to it from your notebook sesssion. Optionally, you can change values for these parameters:
+ADS checks that the bucket exists, and that you can write to it from your notebook session. Optionally, you can change values for these parameters:
 
 * ``compartment_id``: The OCID of the compartment to create a Data Flow application. If it's not provided, the same compartment as your dataflow object is used.
 * ``driver_shape``: The driver shape used to create the application. The default value is ``"VM.Standard2.4"``.
diff --git a/docs/source/user_guide/data_flow/legacy_dataflow.rst b/docs/source/user_guide/data_flow/legacy_dataflow.rst
@@ -68,7 +68,7 @@ In the preparation stage, you prepare the configuration object necessary to crea
 * ``pyspark_file_path``: The local path to your ``PySpark`` script.
 * ``script_bucket``: The bucket used to read/write the ``PySpark`` script in Object Storage.
 
-ADS checks that the bucket exists, and that you can write to it from your notebook sesssion. Optionally, you can change values for these parameters:
+ADS checks that the bucket exists, and that you can write to it from your notebook session. Optionally, you can change values for these parameters:
 
 * ``compartment_id``: The OCID of the compartment to create a  application. If it's not provided, the same compartment as your dataflow object is used.
 * ``driver_shape``: The driver shape used to create the application. The default value is ``"VM.Standard2.4"``.
diff --git a/docs/source/user_guide/operators/anomaly_detection_operator/use_cases.rst b/docs/source/user_guide/operators/anomaly_detection_operator/use_cases.rst
@@ -14,7 +14,7 @@ As a low-code extensible framework, operators enable a wide range of use cases.
 **Which Model is Right for You?**
 
 * Autots is a very comprehensive framework for time series data, winning the M6 benchmark. Parameters can be sent directly to AutoTS' AnomalyDetector class through the ``model_kwargs`` section of the yaml file.
-* AutoMLX is a propreitary modeling framework developed by Oracle's Labs team and distributed through OCI Data Science. Parameters can be sent directly to AutoMLX's AnomalyDetector class through the ``model_kwargs`` section of the yaml file.
+* AutoMLX is a proprietary modeling framework developed by Oracle's Labs team and distributed through OCI Data Science. Parameters can be sent directly to AutoMLX's AnomalyDetector class through the ``model_kwargs`` section of the yaml file.
 * Together these 2 frameworks train and tune more than 25 models, and deliver the est results.
 
 
@@ -39,9 +39,9 @@ As a low-code extensible framework, operators enable a wide range of use cases.
 
 **Feature Engineering**
 
-* The Operator will perform most feature engineering on your behalf, such as infering holidays, day of week, 
+* The Operator will perform most feature engineering on your behalf, such as inferring holidays, day of week,
 
 
 **Latency**
 
-* The Operator is effectively a container distributed through the OCI Data Science platform. When deployed through Jobs or Model Deployment, customers can scale up the compute shape, memory size, and load balancer to make the prediciton progressively faster. Please consult an OCI Data Science Platform expert for more specifc advice.
+* The Operator is effectively a container distributed through the OCI Data Science platform. When deployed through Jobs or Model Deployment, customers can scale up the compute shape, memory size, and load balancer to make the prediction progressively faster. Please consult an OCI Data Science Platform expert for more specific advice.
diff --git a/docs/source/user_guide/quick_start/quick_start.rst b/docs/source/user_guide/quick_start/quick_start.rst
@@ -10,5 +10,5 @@ Quick Start
 * :doc:`Evaluate Trained Models<../model_training/model_evaluation/quick_start>`
 * :doc:`Register, Manage, and Deploy Models<../model_registration/quick_start>`
 * :doc:`Store and Retrieve your data source credentials<../secrets/quick_start>`
-* :doc:`Conect to existing OCI Big Data Service<../big_data_service/quick_start>`
+* :doc:`Connect to existing OCI Big Data Service<../big_data_service/quick_start>`
 
diff --git a/tests/unitary/with_extras/aqua/test_common_utils.py b/tests/unitary/with_extras/aqua/test_common_utils.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*--
+
+# Copyright (c) 2025 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+
+import pytest
+from ads.aqua.common.utils import get_preferred_compatible_family
+
+
+class TestCommonUtils:
+    @pytest.mark.parametrize(
+        "input_families, expected",
+        [
+            (
+                {"odsc-vllm-serving", "odsc-vllm-serving-v1"},
+                "odsc-vllm-serving-v1",
+            ),
+            ({"odsc-tgi-serving", "odsc-vllm-serving"}, None),
+            ({"non-existing-one", "odsc-tgi-serving"}, None),
+        ],
+    )
+    def test_get_preferred_compatible_family(self, input_families, expected):
+        assert get_preferred_compatible_family(input_families) == expected
diff --git a/tests/unitary/with_extras/model/index.json b/tests/unitary/with_extras/model/index.json
@@ -289,7 +289,7 @@
     {
       "arch_type": "CPU",
       "create_date": "Sat, Feb 12, 2022, 05:04:46  UTC",
-      "description": "This environment is designed to provided to test and execute your ONNX model artifacts. ONNX is an open  source, open model format which allows you to save a model from different machine learning (ML) libraries  into a single, portable format that is independent of the training library. ONNX models can be deployed  through Oracle Cloud Infrastruture Data Science Model Deployment service. Use this conda environment to  convert models from most ML libraries into ONNX format. Then use the ONNX runtime to perform inferencing.   Review the processing steps that your model makes by having ONNX generate a graph of the model workflow.\nTo get started with the ONNX environment, review the notebook example **getting-started.ipynb** in the **Notebook Examples launcher button**.\n",
+      "description": "This environment is designed to provided to test and execute your ONNX model artifacts. ONNX is an open  source, open model format which allows you to save a model from different machine learning (ML) libraries  into a single, portable format that is independent of the training library. ONNX models can be deployed  through Oracle Cloud Infrastructure Data Science Model Deployment service. Use this conda environment to  convert models from most ML libraries into ONNX format. Then use the ONNX runtime to perform inferencing.   Review the processing steps that your model makes by having ONNX generate a graph of the model workflow.\nTo get started with the ONNX environment, review the notebook example **getting-started.ipynb** in the **Notebook Examples launcher button**.\n",
       "libraries": [
         "onnx (v1.10.2)",
         "onnxconverter-common (v1.9.0)",
@@ -315,7 +315,7 @@
     {
       "arch_type": "CPU",
       "create_date": "Mon, Jun 06, 2022, 20:51:19  UTC",
-      "description": "This environment is designed to provided to test and execute your ONNX model artifacts. ONNX is an open source, open model format which allows you to save a model from different machine learning (ML) libraries into a single, portable format that is independent of the training library. ONNX models can be deployed through Oracle Cloud Infrastruture Data Science Model Deployment service. Use this conda environment to convert models from most ML libraries into ONNX format. Then use the ONNX runtime to perform inferencing. Review the processing steps that your model makes by having ONNX generate a graph of the model workflow.\nTo get started with the ONNX environment, review the getting-started notebook.\n",
+      "description": "This environment is designed to provided to test and execute your ONNX model artifacts. ONNX is an open source, open model format which allows you to save a model from different machine learning (ML) libraries into a single, portable format that is independent of the training library. ONNX models can be deployed through Oracle Cloud Infrastructure Data Science Model Deployment service. Use this conda environment to convert models from most ML libraries into ONNX format. Then use the ONNX runtime to perform inferencing. Review the processing steps that your model makes by having ONNX generate a graph of the model workflow.\nTo get started with the ONNX environment, review the getting-started notebook.\n",
       "libraries": [
         "onnx (v1.10.2)",
         "onnxconverter-common (v1.9.0)",
@@ -341,7 +341,7 @@
     {
       "arch_type": "CPU",
       "create_date": "Mon, Jun 06, 2022, 20:52:30  UTC",
-      "description": "This environment is designed to provided to test and execute your ONNX model artifacts. ONNX is an open source, open model format which allows you to save a model from different machine learning (ML) libraries into a single, portable format that is independent of the training library. ONNX models can be deployed through Oracle Cloud Infrastruture Data Science Model Deployment service. Use this conda environment to convert models from most ML libraries into ONNX format. Then use the ONNX runtime to perform inferencing. Review the processing steps that your model makes by having ONNX generate a graph of the model workflow.\nTo get started with the ONNX environment, review the getting-started notebook.\n",
+      "description": "This environment is designed to provided to test and execute your ONNX model artifacts. ONNX is an open source, open model format which allows you to save a model from different machine learning (ML) libraries into a single, portable format that is independent of the training library. ONNX models can be deployed through Oracle Cloud Infrastructure Data Science Model Deployment service. Use this conda environment to convert models from most ML libraries into ONNX format. Then use the ONNX runtime to perform inferencing. Review the processing steps that your model makes by having ONNX generate a graph of the model workflow.\nTo get started with the ONNX environment, review the getting-started notebook.\n",
       "libraries": [
         "onnx (v1.10.2)",
         "onnxconverter-common (v1.9.0)",