Skip to content

Commit 2608622

Browse files
authored
Merge branch 'main' into ODSC-70841_update_md_tracking
2 parents 619a925 + f857ce7 commit 2608622

File tree

12 files changed

+148
-60
lines changed

12 files changed

+148
-60
lines changed

ads/aqua/common/enums.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ class InferenceContainerType(ExtendedEnum):
4949
class InferenceContainerTypeFamily(ExtendedEnum):
5050
AQUA_VLLM_CONTAINER_FAMILY = "odsc-vllm-serving"
5151
AQUA_VLLM_V1_CONTAINER_FAMILY = "odsc-vllm-serving-v1"
52+
AQUA_VLLM_LLAMA4_CONTAINER_FAMILY = "odsc-vllm-serving-llama4"
5253
AQUA_TGI_CONTAINER_FAMILY = "odsc-tgi-serving"
5354
AQUA_LLAMA_CPP_CONTAINER_FAMILY = "odsc-llama-cpp-serving"
5455

@@ -119,4 +120,9 @@ class Platform(ExtendedEnum):
119120
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
120121
InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
121122
],
123+
InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY: [
124+
InferenceContainerTypeFamily.AQUA_VLLM_LLAMA4_CONTAINER_FAMILY,
125+
InferenceContainerTypeFamily.AQUA_VLLM_V1_CONTAINER_FAMILY,
126+
InferenceContainerTypeFamily.AQUA_VLLM_CONTAINER_FAMILY,
127+
],
122128
}

ads/aqua/common/utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,9 @@ def get_params_dict(params: Union[str, List[str]]) -> dict:
832832
"""
833833
params_list = get_params_list(params) if isinstance(params, str) else params
834834
return {
835-
split_result[0]: split_result[1] if len(split_result) > 1 else UNKNOWN
835+
split_result[0]: " ".join(split_result[1:])
836+
if len(split_result) > 1
837+
else UNKNOWN
836838
for split_result in (x.split() for x in params_list)
837839
}
838840

@@ -881,7 +883,9 @@ def build_params_string(params: dict) -> str:
881883
A params string.
882884
"""
883885
return (
884-
" ".join(f"{name} {value}" for name, value in params.items()).strip()
886+
" ".join(
887+
f"{name} {value}" if value else f"{name}" for name, value in params.items()
888+
).strip()
885889
if params
886890
else UNKNOWN
887891
)

ads/aqua/config/container_config.py

Lines changed: 38 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from oci.data_science.models import ContainerSummary
88
from pydantic import Field
99

10+
from ads.aqua import logger
1011
from ads.aqua.config.utils.serializer import Serializable
1112
from ads.aqua.constants import (
1213
SERVICE_MANAGED_CONTAINER_URI_SCHEME,
@@ -168,50 +169,47 @@ def from_service_config(
168169
container_type = container.family_name
169170
usages = [x.upper() for x in container.usages]
170171
if "INFERENCE" in usages or "MULTI_MODEL" in usages:
172+
# Extract additional configurations
173+
additional_configurations = {}
174+
try:
175+
additional_configurations = (
176+
container.workload_configuration_details_list[
177+
0
178+
].additional_configurations
179+
)
180+
except (AttributeError, IndexError) as ex:
181+
logger.debug(
182+
"Failed to extract `additional_configurations` for container '%s': %s",
183+
getattr(container, "container_name", "<unknown>"),
184+
ex,
185+
)
186+
171187
container_item.platforms.append(
172-
container.workload_configuration_details_list[
173-
0
174-
].additional_configurations.get("platforms")
188+
additional_configurations.get("platforms")
175189
)
176190
container_item.model_formats.append(
177-
container.workload_configuration_details_list[
178-
0
179-
].additional_configurations.get("modelFormats")
191+
additional_configurations.get("modelFormats")
180192
)
193+
194+
# Parse environment variables from `additional_configurations`.
195+
# Only keys present in the configuration will be added to the result.
196+
config_keys = {
197+
"MODEL_DEPLOY_PREDICT_ENDPOINT": UNKNOWN,
198+
"MODEL_DEPLOY_HEALTH_ENDPOINT": UNKNOWN,
199+
"MODEL_DEPLOY_ENABLE_STREAMING": UNKNOWN,
200+
"PORT": UNKNOWN,
201+
"HEALTH_CHECK_PORT": UNKNOWN,
202+
"VLLM_USE_V1": UNKNOWN,
203+
}
204+
181205
env_vars = [
182-
{
183-
"MODEL_DEPLOY_PREDICT_ENDPOINT": container.workload_configuration_details_list[
184-
0
185-
].additional_configurations.get(
186-
"MODEL_DEPLOY_PREDICT_ENDPOINT", UNKNOWN
187-
)
188-
},
189-
{
190-
"MODEL_DEPLOY_HEALTH_ENDPOINT": container.workload_configuration_details_list[
191-
0
192-
].additional_configurations.get(
193-
"MODEL_DEPLOY_HEALTH_ENDPOINT", UNKNOWN
194-
)
195-
},
196-
{
197-
"MODEL_DEPLOY_ENABLE_STREAMING": container.workload_configuration_details_list[
198-
0
199-
].additional_configurations.get(
200-
"MODEL_DEPLOY_ENABLE_STREAMING", UNKNOWN
201-
)
202-
},
203-
{
204-
"PORT": container.workload_configuration_details_list[
205-
0
206-
].additional_configurations.get("PORT", "")
207-
},
208-
{
209-
"HEALTH_CHECK_PORT": container.workload_configuration_details_list[
210-
0
211-
].additional_configurations.get("HEALTH_CHECK_PORT", UNKNOWN),
212-
},
206+
{key: additional_configurations.get(key, default)}
207+
for key, default in config_keys.items()
208+
if key in additional_configurations
213209
]
214-
container_spec = AquaContainerConfigSpec(
210+
211+
# Build container spec
212+
container_item.spec = AquaContainerConfigSpec(
215213
cli_param=container.workload_configuration_details_list[0].cmd,
216214
server_port=str(
217215
container.workload_configuration_details_list[0].server_port
@@ -236,13 +234,14 @@ def from_service_config(
236234
)
237235
),
238236
)
239-
container_item.spec = container_spec
237+
240238
if "INFERENCE" in usages or "MULTI_MODEL" in usages:
241239
inference_items[container_type] = container_item
242240
if "FINE_TUNE" in usages:
243241
finetune_items[container_type] = container_item
244242
if "EVALUATION" in usages:
245243
evaluate_items[container_type] = container_item
244+
246245
return cls(
247246
inference=inference_items, finetune=finetune_items, evaluate=evaluate_items
248247
)

ads/aqua/model/model.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,11 +316,17 @@ def create_multi(
316316
# )
317317

318318
# check if model is a fine-tuned model and if so, add the fine tuned weights path to the fine_tune_weights_location pydantic field
319-
is_fine_tuned_model = Tags.AQUA_FINE_TUNED_MODEL_TAG in source_model.freeform_tags
319+
is_fine_tuned_model = (
320+
Tags.AQUA_FINE_TUNED_MODEL_TAG in source_model.freeform_tags
321+
)
320322

321323
if is_fine_tuned_model:
322-
model.model_id, model.model_name = extract_base_model_from_ft(source_model)
323-
model_artifact_path, model.fine_tune_weights_location = extract_fine_tune_artifacts_path(source_model)
324+
model.model_id, model.model_name = extract_base_model_from_ft(
325+
source_model
326+
)
327+
model_artifact_path, model.fine_tune_weights_location = (
328+
extract_fine_tune_artifacts_path(source_model)
329+
)
324330

325331
else:
326332
# Retrieve model artifact for base models
@@ -380,7 +386,8 @@ def create_multi(
380386
raise AquaValueError(
381387
"The selected models are associated with different container families: "
382388
f"{list(selected_models_deployment_containers)}."
383-
"For multi-model deployment, all models in the group must share the same container family."
389+
"For multi-model deployment, all models in the group must belong to the same container "
390+
"family or to compatible container families."
384391
)
385392
else:
386393
deployment_container = selected_models_deployment_containers.pop()

ads/opctl/operator/lowcode/forecast/model/prophet.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ def explain_model(self):
304304
# Global Expl
305305
g_expl = self.drop_horizon(expl_df).mean()
306306
g_expl.name = s_id
307-
global_expl.append(g_expl)
307+
global_expl.append(np.abs(g_expl))
308308
self.global_explanation = pd.concat(global_expl, axis=1)
309309
self.formatted_global_explanation = (
310310
self.global_explanation / self.global_explanation.sum(axis=0) * 100

docs/source/release_notes.rst

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,21 @@
22
Release Notes
33
=============
44

5+
2.13.9
6+
-------
7+
Release date: May 19, 2025
8+
9+
* Additional precision support in ForecastOperator.
10+
* AI Quick Actions: Use defined-metadata to include configuration for fine-tuned models.
11+
* AI Quick Actions: Support for embedding models in a multi model deployment.
12+
* AI Quick Actions: Fixed a bug in multi-model deployment to use model artifact json directly instead of accessing service bucket when creating a new grouped model.
13+
* AI Quick Actions telemetry improvements and enhancement to use threadpool instead of creating unbounded number of threads for telemetry.
14+
* AI Quick Actions: Support for ``list`` API for compute capacity reservations to onboard Bring-your-own-reservations (BYOR).
15+
* AI Quick Actions: Fixed a bug which now allows multiple parameters for deployment parameters.
16+
* AI Quick Actions: Enhances the model deployment logic for vLLM architecture version.
17+
* AI Quick Actions: Enhances functionality to retrieve deployment configuration for fine-tuned models.
18+
19+
520
2.13.8
621
-------
722
Release date: April 15, 2025

docs/source/user_guide/large_language_model/aqua_client.rst

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ Sync Usage
4646
client = Client(endpoint="https://<MD_OCID>/predict")
4747
response = client.chat(
4848
messages=[{"role": "user", "content": "Tell me a joke."}],
49-
payload={"model": "odsc-llm"},
50-
stream=False,
49+
payload={"model": "odsc-llm"}
5150
)
5251
print(response)
5352
@@ -58,7 +57,7 @@ Sync Usage
5857
from ads.aqua import Client
5958
ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
6059
61-
client = Client(endpoint="https://<MD_OCID>/predict")
60+
client = Client(endpoint="https://<MD_OCID>/predictWithResponseStream")
6261
response = client.chat(
6362
messages=[{"role": "user", "content": "Tell me a joke."}],
6463
payload={"model": "odsc-llm"},
@@ -97,8 +96,7 @@ The following examples demonstrate how to perform the same operations using the
9796
client = AsyncClient(endpoint="https://<MD_OCID>/predict")
9897
response = await client.generate(
9998
prompt="Tell me a joke",
100-
payload={"model": "odsc-llm"},
101-
stream=False,
99+
payload={"model": "odsc-llm"}
102100
)
103101
print(response)
104102
@@ -109,7 +107,7 @@ The following examples demonstrate how to perform the same operations using the
109107
from ads.aqua import AsyncClient
110108
ads.set_auth(auth="security_token", profile="<replace-with-your-profile>")
111109
112-
client = AsyncClient(endpoint="https://<MD_OCID>/predict")
110+
client = AsyncClient(endpoint="https://<MD_OCID>/predictWithResponseStream")
113111
async for chunk in await client.generate(
114112
prompt="Tell me a joke",
115113
payload={"model": "odsc-llm"},
@@ -225,11 +223,33 @@ The synchronous client, ``OpenAI``, extends the OpenAI client. If no HTTP client
225223
"content": "Tell me a joke.",
226224
}
227225
],
228-
# stream=True, # enable for streaming
229226
)
230227
231228
print(response)
232229
230+
**Streaming**
231+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
232+
233+
.. code-block:: python
234+
235+
client = OpenAI(
236+
base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream/v1",
237+
)
238+
239+
response = client.chat.completions.create(
240+
model="odsc-llm",
241+
messages=[
242+
{
243+
"role": "user",
244+
"content": "Tell me a joke.",
245+
}
246+
],
247+
stream=True
248+
)
249+
250+
for chunk in response:
251+
print(chunk)
252+
233253
234254
**Asynchronous Client**
235255

@@ -246,7 +266,7 @@ The asynchronous client, ``AsynOpenAI``, extends the AsyncOpenAI client. If no a
246266
247267
async def test_async() -> None:
248268
client_async = AsyncOpenAI(
249-
base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predict/v1",
269+
base_url="https://modeldeployment.us-ashburn-1.oci.customer-oci.com/<OCID>/predictWithResponseStream/v1",
250270
)
251271
response = await client_async.chat.completions.create(
252272
model="odsc-llm",

docs/source/user_guide/large_language_model/llamaindex_integration.rst

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ Streaming
8282

8383
Using ``stream_complete`` endpoint
8484
-------------------------------
85+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
8586

8687
.. code-block:: python3
8788
@@ -92,7 +93,7 @@ Using ``stream_complete`` endpoint
9293
9394
llm = OCIDataScience(
9495
model="odsc-llm",
95-
endpoint="https://<MD_OCID>/predict",
96+
endpoint="https://<MD_OCID>/predictWithResponseStream",
9697
)
9798
9899
for chunk in llm.stream_complete("Tell me a joke"):
@@ -101,6 +102,8 @@ Using ``stream_complete`` endpoint
101102
Using ``stream_chat`` endpoint
102103
----------------------------
103104

105+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
106+
104107
.. code-block:: python3
105108
106109
import ads
@@ -111,7 +114,7 @@ Using ``stream_chat`` endpoint
111114
112115
llm = OCIDataScience(
113116
model="odsc-llm",
114-
endpoint="https://<MD_OCID>/predict",
117+
endpoint="https://<MD_OCID>/predictWithResponseStream",
115118
)
116119
response = llm.stream_chat(
117120
[
@@ -176,6 +179,8 @@ Async Streaming
176179
Using ``astream_complete`` endpoint
177180
---------------------------------
178181

182+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
183+
179184
.. code-block:: python3
180185
181186
import ads
@@ -185,7 +190,7 @@ Using ``astream_complete`` endpoint
185190
186191
llm = OCIDataScience(
187192
model="odsc-llm",
188-
endpoint="https://<MD_OCID>/predict",
193+
endpoint="https://<MD_OCID>/predictWithResponseStream",
189194
)
190195
191196
async for chunk in await llm.astream_complete("Tell me a joke"):
@@ -194,6 +199,8 @@ Using ``astream_complete`` endpoint
194199
Using ``astream_chat`` endpoint
195200
-----------------------------
196201

202+
For streaming, a dedicated endpoint must be used: ``/predictWithResponseStream``.
203+
197204
.. code-block:: python3
198205
199206
import ads
@@ -204,7 +211,7 @@ Using ``astream_chat`` endpoint
204211
205212
llm = OCIDataScience(
206213
model="odsc-llm",
207-
endpoint="https://<MD_OCID>/predict",
214+
endpoint="https://<MD_OCID>/predictWithResponseStream",
208215
)
209216
response = await llm.stream_chat(
210217
[

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi"
2121

2222
# Required
2323
name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below
24-
version = "2.13.8"
24+
version = "2.13.9"
2525

2626
# Optional
2727
description = "Oracle Accelerated Data Science SDK"

0 commit comments

Comments
 (0)