Merge branch 'main' into ODSC-70841_update_md_tracking

mrDzurb · web-flow · commit 260862211383 · 2025-05-22T13:53:59.000-07:00
diff --git a/ads/aqua/common/enums.py b/ads/aqua/common/enums.py
@@ -49,6 +49,7 @@ class InferenceContainerType(ExtendedEnum):
 class InferenceContainerTypeFamily(ExtendedEnum):
     AQUA_VLLM_CONTAINER_FAMILY = "odsc-vllm-serving"
     AQUA_VLLM_V1_CONTAINER_FAMILY = "odsc-vllm-serving-v1"
+    AQUA_VLLM_LLAMA4_CONTAINER_FAMILY = "odsc-vllm-serving-llama4"
     AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
     AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
 
@@ -119,4 +120,9 @@ class Platform(ExtendedEnum):
         InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
         InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
     ],
+    InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY: [
+        InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY,
+        InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
+        InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
+    ],
 }
diff --git a/ads/aqua/common/utils.py b/ads/aqua/common/utils.py
@@ -832,7 +832,9 @@ def get_params_dict(params: Union[str, List[str]]) -> dict:
     """
     params_list = get_params_list(params) if isinstance(params, str) else params
     return {
-        split_result[0]: split_result[1] if len(split_result) > 1 else UNKNOWN
+        split_result[0]: " ".join(split_result[1:])
+        if len(split_result) > 1
+        else UNKNOWN
         for split_result in (x.split() for x in params_list)
     }
 
@@ -881,7 +883,9 @@ def build_params_string(params: dict) -> str:
         A params string.
     """
     return (
-        " ".join(f"{name} {value}" for name, value in params.items()).strip()
+        " ".join(
+            f"{name} {value}" if value else f"{name}" for name, value in params.items()
+        ).strip()
         if params
         else UNKNOWN
     )
diff --git a/ads/aqua/config/container_config.py b/ads/aqua/config/container_config.py
@@ -7,6 +7,7 @@
 from oci.data_science.models import ContainerSummary
 from pydantic import Field
 
+from ads.aqua import logger
 from ads.aqua.config.utils.serializer import Serializable
 from ads.aqua.constants import (
     SERVICE_MANAGED_CONTAINER_URI_SCHEME,
@@ -168,50 +169,47 @@ def from_service_config(
             container_type = container.family_name
             usages = [x.upper() for x in container.usages]
             if "INFERENCE" in usages or "MULTI_MODEL" in usages:
+                # Extract additional configurations
+                additional_configurations = {}
+                try:
+                    additional_configurations = (
+                        container.workload_configuration_details_list[
+                            0
+                        ].additional_configurations
+                    )
+                except (AttributeError, IndexError) as ex:
+                    logger.debug(
+                        "Failed to extract `additional_configurations` for container '%s': %s",
+                        getattr(container, "container_name", "<unknown>"),
+                        ex,
+                    )
+
                 container_item.platforms.append(
-                    container.workload_configuration_details_list[
-                        0
-                    ].additional_configurations.get("platforms")
+                    additional_configurations.get("platforms")
                 )
                 container_item.model_formats.append(
-                    container.workload_configuration_details_list[
-                        0
-                    ].additional_configurations.get("modelFormats")
+                    additional_configurations.get("modelFormats")
                 )
+
+                # Parse environment variables from `additional_configurations`.
+                # Only keys present in the configuration will be added to the result.
+                config_keys = {
+                    "MODEL_DEPLOY_PREDICT_ENDPOINT": UNKNOWN,
+                    "MODEL_DEPLOY_HEALTH_ENDPOINT": UNKNOWN,
+                    "MODEL_DEPLOY_ENABLE_STREAMING": UNKNOWN,
+                    "PORT": UNKNOWN,
+                    "HEALTH_CHECK_PORT": UNKNOWN,
+                    "VLLM_USE_V1": UNKNOWN,
+                }
+
                 env_vars = [
-                    {
-                        "MODEL_DEPLOY_PREDICT_ENDPOINT": container.workload_configuration_details_list[
-                            0
-                        ].additional_configurations.get(
-                            "MODEL_DEPLOY_PREDICT_ENDPOINT", UNKNOWN
-                        )
-                    },
-                    {
-                        "MODEL_DEPLOY_HEALTH_ENDPOINT": container.workload_configuration_details_list[
-                            0
-                        ].additional_configurations.get(
-                            "MODEL_DEPLOY_HEALTH_ENDPOINT", UNKNOWN
-                        )
-                    },
-                    {
-                        "MODEL_DEPLOY_ENABLE_STREAMING": container.workload_configuration_details_list[
-                            0
-                        ].additional_configurations.get(
-                            "MODEL_DEPLOY_ENABLE_STREAMING", UNKNOWN
-                        )
-                    },
-                    {
-                        "PORT": container.workload_configuration_details_list[
-                            0
-                        ].additional_configurations.get("PORT", "")
-                    },
-                    {
-                        "HEALTH_CHECK_PORT": container.workload_configuration_details_list[
-                            0
-                        ].additional_configurations.get("HEALTH_CHECK_PORT", UNKNOWN),
-                    },
+                    {key: additional_configurations.get(key, default)}
+                    for key, default in config_keys.items()
+                    if key in additional_configurations
                 ]
-                container_spec = AquaContainerConfigSpec(
+
+                # Build container spec
+                container_item.spec = AquaContainerConfigSpec(
                     cli_param=container.workload_configuration_details_list[0].cmd,
                     server_port=str(
                         container.workload_configuration_details_list[0].server_port
@@ -236,13 +234,14 @@ def from_service_config(
                         )
                     ),
                 )
-                container_item.spec = container_spec
+
             if "INFERENCE" in usages or "MULTI_MODEL" in usages:
                 inference_items[container_type] = container_item
             if "FINE_TUNE" in usages:
                 finetune_items[container_type] = container_item
             if "EVALUATION" in usages:
                 evaluate_items[container_type] = container_item
+
         return cls(
             inference=inference_items, finetune=finetune_items, evaluate=evaluate_items
         )
diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
@@ -316,11 +316,17 @@ def create_multi(
             #     )
 
             # check if model is a fine-tuned model and if so, add the fine tuned weights path to the fine_tune_weights_location pydantic field
-            is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in source_model.freeform_tags
+            is_fine_tuned_model = (
+                Tags.AQUA_FINE_TUNED_MODEL_TAG in source_model.freeform_tags
+            )
 
             if is_fine_tuned_model:
-                model.model_id, model.model_name = extract_base_model_from_ft(source_model)
-                model_artifact_path, model.fine_tune_weights_location = extract_fine_tune_artifacts_path(source_model)
+                model.model_id, model.model_name = extract_base_model_from_ft(
+                    source_model
+                )
+                model_artifact_path, model.fine_tune_weights_location = (
+                    extract_fine_tune_artifacts_path(source_model)
+                )
 
             else:
                 # Retrieve model artifact for base models
@@ -380,7 +386,8 @@ def create_multi(
                 raise AquaValueError(
                     "The selected models are associated with different container families: "
                     f"{list(selected_models_deployment_containers)}."
-                    "For multi-model deployment, all models in the group must share the same container family."
+                    "For multi-model deployment, all models in the group must belong to the same container "
+                    "family or to compatible container families."
                 )
         else:
             deployment_container = selected_models_deployment_containers.pop()
diff --git a/ads/opctl/operator/lowcode/forecast/model/prophet.py b/ads/opctl/operator/lowcode/forecast/model/prophet.py
@@ -304,7 +304,7 @@ def explain_model(self):
             # Global Expl
             g_expl = self.drop_horizon(expl_df).mean()
             g_expl.name = s_id
-            global_expl.append(g_expl)
+            global_expl.append(np.abs(g_expl))
         self.global_explanation = pd.concat(global_expl, axis=1)
         self.formatted_global_explanation = (
             self.global_explanation / self.global_explanation.sum(axis=0) * 100
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -2,6 +2,21 @@
 Release Notes
 =============
 
+2.13.9
+-------
+Release date: May 19, 2025
+
+* Additional precision support in ForecastOperator.
+* AI Quick Actions: Use defined-metadata to include configuration for fine-tuned models.
+* AI Quick Actions: Support for embedding models in a multi model deployment.
+* AI Quick Actions: Fixed a bug in multi-model deployment to use model artifact json directly instead of accessing service bucket when creating a new grouped model.
+* AI Quick Actions telemetry improvements and enhancement to use threadpool instead of creating unbounded number of threads for telemetry.
+* AI Quick Actions: Support for ``list`` API for compute capacity reservations to onboard Bring-your-own-reservations (BYOR).
+* AI Quick Actions: Fixed a bug which now allows multiple parameters for deployment parameters.
+* AI Quick Actions: Enhances the model deployment logic for  vLLM architecture version.
+* AI Quick Actions: Enhances functionality to retrieve deployment configuration for fine-tuned models.
+
+
 2.13.8
 -------
 Release date: April 15, 2025
diff --git a/docs/source/user_guide/large_language_model/aqua_client.rst b/docs/source/user_guide/large_language_model/aqua_client.rst
@@ -46,8 +46,7 @@ Sync Usage
     client = Client(endpoint="https://<MD_OCID>/predict")
     response = client.chat(
         messages=[{"role": "user", "content": "Tell me a joke."}],
-        payload={"model": "odsc-llm"},
-        stream=False,
+        payload={"model": "odsc-llm"}
     )
     print(response)
 
@@ -58,7 +57,7 @@ Sync Usage
     from ads.aqua import Client
     ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
 
-    client = Client(endpoint="https://<MD_OCID>/predict")
+    client = Client(endpoint="https://<MD_OCID>/predictWithResponseStream")
     response = client.chat(
         messages=[{"role": "user", "content": "Tell me a joke."}],
         payload={"model": "odsc-llm"},
@@ -97,8 +96,7 @@ The following examples demonstrate how to perform the same operations using the
     client = AsyncClient(endpoint="https://<MD_OCID>/predict")
     response = await client.generate(
         prompt="Tell me a joke",
-        payload={"model": "odsc-llm"},
-        stream=False,
+        payload={"model": "odsc-llm"}
     )
     print(response)
 
@@ -109,7 +107,7 @@ The following examples demonstrate how to perform the same operations using the
     from ads.aqua import AsyncClient
     ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
 
-    client = AsyncClient(endpoint="https://<MD_OCID>/predict")
+    client = AsyncClient(endpoint="https://<MD_OCID>/predictWithResponseStream")
     async for chunk in await client.generate(
         prompt="Tell me a joke",
         payload={"model": "odsc-llm"},
@@ -225,11 +223,33 @@ The synchronous client, ``OpenAI``, extends the OpenAI client. If no HTTP client
                 "content": "Tell me a joke.",
             }
         ],
-        # stream=True, # enable for streaming
     )
 
     print(response)
 
+**Streaming**
+For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
+
+.. code-block:: python
+
+    client = OpenAI(
+            base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream/v1",
+        )
+
+    response = client.chat.completions.create(
+        model="odsc-llm",
+        messages=[
+            {
+                "role": "user",
+                "content": "Tell me a joke.",
+            }
+        ],
+        stream=True
+    )
+
+    for chunk in response:
+        print(chunk)
+
 
 **Asynchronous Client**
 
@@ -246,7 +266,7 @@ The asynchronous client, ``AsynOpenAI``, extends the AsyncOpenAI client. If no a
 
     async def test_async() -> None:
         client_async = AsyncOpenAI(
-            base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predict/v1",
+            base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream/v1",
         )
         response = await client_async.chat.completions.create(
             model="odsc-llm",
diff --git a/docs/source/user_guide/large_language_model/llamaindex_integration.rst b/docs/source/user_guide/large_language_model/llamaindex_integration.rst
@@ -82,6 +82,7 @@ Streaming
 
 Using ``stream_complete`` endpoint
 -------------------------------
+For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
 
 .. code-block:: python3
 
@@ -92,7 +93,7 @@ Using ``stream_complete`` endpoint
 
    llm = OCIDataScience(
        model="odsc-llm",
-       endpoint="https://<MD_OCID>/predict",
+       endpoint="https://<MD_OCID>/predictWithResponseStream",
    )
 
    for chunk in llm.stream_complete("Tell me a joke"):
@@ -101,6 +102,8 @@ Using ``stream_complete`` endpoint
 Using ``stream_chat`` endpoint
 ----------------------------
 
+For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
+
 .. code-block:: python3
 
    import ads
@@ -111,7 +114,7 @@ Using ``stream_chat`` endpoint
 
    llm = OCIDataScience(
        model="odsc-llm",
-       endpoint="https://<MD_OCID>/predict",
+       endpoint="https://<MD_OCID>/predictWithResponseStream",
    )
    response = llm.stream_chat(
        [
@@ -176,6 +179,8 @@ Async Streaming
 Using ``astream_complete`` endpoint
 ---------------------------------
 
+For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
+
 .. code-block:: python3
 
    import ads
@@ -185,7 +190,7 @@ Using ``astream_complete`` endpoint
 
    llm = OCIDataScience(
        model="odsc-llm",
-       endpoint="https://<MD_OCID>/predict",
+       endpoint="https://<MD_OCID>/predictWithResponseStream",
    )
 
    async for chunk in await llm.astream_complete("Tell me a joke"):
@@ -194,6 +199,8 @@ Using ``astream_complete`` endpoint
 Using ``astream_chat`` endpoint
 -----------------------------
 
+For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
+
 .. code-block:: python3
 
    import ads
@@ -204,7 +211,7 @@ Using ``astream_chat`` endpoint
 
    llm = OCIDataScience(
        model="odsc-llm",
-       endpoint="https://<MD_OCID>/predict",
+       endpoint="https://<MD_OCID>/predictWithResponseStream",
    )
    response = await llm.stream_chat(
        [
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi"
 
 # Required
 name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below
-version = "2.13.8"
+version = "2.13.9"
 
 # Optional
 description = "Oracle Accelerated Data Science SDK"
diff --git a/tests/unitary/with_extras/aqua/test_common_utils.py b/tests/unitary/with_extras/aqua/test_common_utils.py
diff --git a/tests/unitary/with_extras/aqua/test_ui.py b/tests/unitary/with_extras/aqua/test_ui.py
diff --git a/tests/unitary/with_extras/aqua/test_utils.py b/tests/unitary/with_extras/aqua/test_utils.py