Skip to content

Commit 2bf8ce7

Browse files
authored
Odsc 50180/negative r2 (#448)
2 parents 3eb3240 + d0adf1a commit 2bf8ce7

File tree

20 files changed

+19227
-251
lines changed

20 files changed

+19227
-251
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
name: "[Py3.8][Py3.10] Operators Tests"
2+
3+
on:
4+
workflow_dispatch:
5+
pull_request:
6+
paths:
7+
- "ads/opctl/operator/**"
8+
- "**requirements.txt"
9+
- ".github/workflows/run-operators*.yml"
10+
- "test-requirements-operators.txt"
11+
12+
# Cancel in progress workflows on pull_requests.
13+
# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value
14+
concurrency:
15+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16+
cancel-in-progress: true
17+
18+
permissions:
19+
contents: read
20+
21+
# hack for https://github.com/actions/cache/issues/810#issuecomment-1222550359
22+
env:
23+
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5
24+
25+
jobs:
26+
test:
27+
name: python ${{ matrix.python-version }}
28+
runs-on: ubuntu-latest
29+
timeout-minutes: 60
30+
31+
strategy:
32+
fail-fast: false
33+
matrix:
34+
python-version: ["3.8", "3.10.8"]
35+
36+
steps:
37+
- uses: actions/checkout@v3
38+
39+
- uses: actions/setup-python@v4
40+
with:
41+
python-version: ${{ matrix.python-version }}
42+
cache: "pip"
43+
cache-dependency-path: |
44+
pyproject.toml
45+
"**requirements.txt"
46+
"test-requirements-operators.txt"
47+
48+
- uses: ./.github/workflows/set-dummy-conf
49+
name: "Test config setup"
50+
51+
- name: "Run Operators Tests"
52+
timeout-minutes: 60
53+
shell: bash
54+
run: |
55+
set -x # print commands that are executed
56+
$CONDA/bin/conda init
57+
source /home/runner/.bashrc
58+
pip install -r test-requirements-operators.txt
59+
python -m pytest -v -p no:warnings --durations=5 tests/operators

ads/common/oci_client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from oci.resource_search import ResourceSearchClient
1818
from oci.secrets import SecretsClient
1919
from oci.vault import VaultsClient
20+
2021
logger = logging.getLogger(__name__)
2122

2223

@@ -64,13 +65,14 @@ def _client_impl(self, client):
6465
"data_labeling_dp": DataLabelingClient,
6566
"data_labeling_cp": DataLabelingManagementClient,
6667
"resource_search": ResourceSearchClient,
67-
"data_catalog": DataCatalogClient
68+
"data_catalog": DataCatalogClient,
6869
}
6970
try:
7071
from oci.feature_store import FeatureStoreClient
72+
7173
client_map["feature_store"] = FeatureStoreClient
7274
except ImportError:
73-
logger.warning("OCI SDK with feature store support is not installed")
75+
logger.debug("OCI SDK with feature store support is not installed")
7476
pass
7577

7678
assert (

ads/opctl/distributed/cmds.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,22 @@
44
# Copyright (c) 2022, 2023 Oracle and/or its affiliates.
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

7-
import os
8-
import fsspec
9-
import subprocess
10-
import yaml
117
import json
8+
import os
129
import re
13-
import docker
10+
import subprocess
1411
from configparser import ConfigParser
15-
16-
from ads.common.auth import create_signer, AuthType, AuthContext
1712
from urllib.parse import urlparse
18-
from ads.jobs import Job, DataScienceJobRun
13+
14+
import fsspec
15+
import yaml
16+
17+
from ads.common.auth import AuthContext, AuthType, create_signer
18+
from ads.common.decorator.runtime_dependency import (
19+
OptionalDependency,
20+
runtime_dependency,
21+
)
22+
from ads.jobs import Job
1923
from ads.jobs.builders.runtimes.artifact import Artifact
2024
from ads.opctl.utils import publish_image as publish_image_cmd
2125
from ads.opctl.utils import run_command
@@ -137,6 +141,7 @@ def show_config_info(job_id, work_dir, cluster_file_name, worker_info, **kwargs)
137141
)
138142

139143

144+
@runtime_dependency(module="docker", install_from=OptionalDependency.OPCTL)
140145
def verify_image(img_name):
141146
"""
142147
Verify if the input image exists in OCI registry
@@ -151,6 +156,8 @@ def verify_image(img_name):
151156
-------
152157
153158
"""
159+
import docker
160+
154161
client = docker.from_env()
155162
try:
156163
client.images.get_registry_data(img_name)

ads/opctl/operator/lowcode/forecast/model/arima.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def _build_model(self) -> pd.DataFrame:
8787

8888
fitted_values[target] = model.predict_in_sample(X=X_in)
8989
actual_values[target] = y
90+
actual_values[target].index = pd.to_datetime(y.index)
9091

9192
# Build future dataframe
9293
start_date = y.index.values[-1]
@@ -107,12 +108,7 @@ def _build_model(self) -> pd.DataFrame:
107108
)
108109
yhat_clean = pd.DataFrame(yhat, index=yhat.index, columns=["yhat"])
109110

110-
dt_columns[target] = pd.concat(
111-
[
112-
df_encoded[self.spec.datetime_column.name],
113-
pd.Series(yhat_clean.index),
114-
]
115-
)
111+
dt_columns[target] = df_encoded[self.spec.datetime_column.name]
116112
conf_int_clean = pd.DataFrame(
117113
conf_int, index=yhat.index, columns=["yhat_lower", "yhat_upper"]
118114
)
@@ -139,10 +135,10 @@ def _build_model(self) -> pd.DataFrame:
139135
for cat in self.categories:
140136
output_i = pd.DataFrame()
141137
output_i["Date"] = dt_columns[f"{col}_{cat}"]
142-
output_i = output_i.set_index("Date")
143138
output_i["Series"] = cat
144-
output_i["input_value"] = actual_values[f"{col}_{cat}"]
139+
output_i = output_i.set_index("Date")
145140

141+
output_i["input_value"] = actual_values[f"{col}_{cat}"]
146142
output_i["fitted_value"] = fitted_values[f"{col}_{cat}"]
147143
output_i["forecast_value"] = outputs[f"{col}_{cat}"]["yhat"]
148144
output_i[yhat_upper_name] = outputs[f"{col}_{cat}"]["yhat_upper"]

ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import pandas as pd
88
from ..operator_config import ForecastOperatorConfig
99
from .. import utils
10-
from ads.opctl.operator.lowcode.forecast.utils import default_signer
1110
from .transformations import Transformations
1211
from ads.opctl import logger
1312
import pandas as pd
@@ -40,7 +39,6 @@ def _load_data(self, spec):
4039
raw_data = utils._load_data(
4140
filename=spec.historical_data.url,
4241
format=spec.historical_data.format,
43-
storage_options=default_signer(),
4442
columns=spec.historical_data.columns,
4543
)
4644
self.original_user_data = raw_data.copy()
@@ -71,7 +69,6 @@ def _load_data(self, spec):
7169
additional_data = utils._load_data(
7270
filename=spec.additional_data.url,
7371
format=spec.additional_data.format,
74-
storage_options=default_signer(),
7572
columns=spec.additional_data.columns,
7673
)
7774
additional_data = data_transformer._sort_by_datetime_col(additional_data)

ads/opctl/operator/lowcode/forecast/utils.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,13 @@
1717
explained_variance_score,
1818
mean_absolute_percentage_error,
1919
mean_squared_error,
20-
r2_score,
2120
)
2221

22+
try:
23+
from scipy.stats import linregress
24+
except:
25+
from sklearn.metrics import r2_score
26+
2327
from ads.common.object_storage_details import ObjectStorageDetails
2428
from ads.dataset.label_encoder import DataFrameLabelEncoder
2529
from ads.opctl import logger
@@ -358,7 +362,10 @@ def _build_metrics_df(y_true, y_pred, column_name):
358362
metrics["sMAPE"] = smape(actual=y_true, predicted=y_pred)
359363
metrics["MAPE"] = mean_absolute_percentage_error(y_true=y_true, y_pred=y_pred)
360364
metrics["RMSE"] = np.sqrt(mean_squared_error(y_true=y_true, y_pred=y_pred))
361-
metrics["r2"] = r2_score(y_true=y_true, y_pred=y_pred)
365+
try:
366+
metrics["r2"] = linregress(y_true, y_pred).rvalue ** 2
367+
except:
368+
metrics["r2"] = r2_score(y_true=y_true, y_pred=y_pred)
362369
metrics["Explained Variance"] = explained_variance_score(
363370
y_true=y_true, y_pred=y_pred
364371
)

docs/source/user_guide/apachespark/setup-installation.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Activate the conda environment to upgrade to the latest ``oracle-ads``
2424
.. code-block:: shell
2525
2626
conda activate /home/datascience/conda/pyspark30_p37_cpu_v5
27-
pip install oracle-ads[data_science, data, opctl] --upgrade
27+
pip install "oracle-ads[data_science, data, opctl]" --upgrade
2828
2929
3030
Configuring core-site.xml
@@ -200,4 +200,3 @@ Data Flow to access logs bucket, use a policy like:
200200
ALLOW SERVICE dataflow TO READ objects IN tenancy WHERE target.bucket.name='dataflow-logs'
201201
202202
For more information, see the `Data Flow documentation <https://docs.oracle.com/en-us/iaas/data-flow/using/dfs_getting_started.htm#set_up_admin>`__.
203-

docs/source/user_guide/big_data_service/boilerplate/create_conda.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,5 @@ The following are the recommended steps to create a conda environment to connect
44
- ``odsc conda install -s pyspark30_p37_cpu_v3``: Install the PySpark conda environment.
55
- ``conda activate /home/datascience/conda/pyspark30_p37_cpu_v3``: Activate the PySpark conda environment so that you can modify it.
66
- ``pip uninstall oracle_ads``: Uninstall the old ADS package in this environment.
7-
- ``pip install oracle_ads[bds]``: Install the latest version of ADS that contains BDS support.
7+
- ``pip install "oracle_ads[bds]"``: Install the latest version of ADS that contains BDS support.
88
- ``conda install sasl``: Install ``sasl``.
9-

docs/source/user_guide/cli/quickstart.rst

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,79 +61,79 @@ To work with gradient boosting models, install the ``boosted`` module. This modu
6161

6262
.. code-block:: bash
6363
64-
$ python3 -m pip install oracle-ads[boosted]
64+
$ python3 -m pip install "oracle-ads[boosted]"
6565
6666
For big data use cases using Oracle Big Data Service (BDS), install the ``bds`` module. It includes the following libraries: ``ibis-framework[impala]``, ``hdfs[kerberos]`` and ``sqlalchemy``.
6767

6868
.. code-block:: bash
6969
70-
$ python3 -m pip install oracle-ads[bds]
70+
$ python3 -m pip install "oracle-ads[bds]"
7171
7272
To work with a broad set of data formats (for example, Excel, Avro, etc.) install the ``data`` module. It includes the following libraries: ``fastavro``, ``openpyxl``, ``pandavro``, ``asteval``, ``datefinder``, ``htmllistparse``, and ``sqlalchemy``.
7373

7474
.. code-block:: bash
7575
76-
$ python3 -m pip install oracle-ads[data]
76+
$ python3 -m pip install "oracle-ads[data]"
7777
7878
To work with geospatial data install the ``geo`` module. It includes the ``geopandas`` and libraries from the ``viz`` module.
7979

8080
.. code-block:: bash
8181
82-
$ python3 -m pip install oracle-ads[geo]
82+
$ python3 -m pip install "oracle-ads[geo]"
8383
8484
Install the ``notebook`` module to use ADS within the Oracle Cloud Infrastructure Data Science service `Notebook Session <https://docs.oracle.com/en-us/iaas/data-science/using/manage-notebook-sessions.htm>`_. This module installs ``ipywidgets`` and ``ipython`` libraries.
8585

8686
.. code-block:: bash
8787
88-
$ python3 -m pip install oracle-ads[notebook]
88+
$ python3 -m pip install "oracle-ads[notebook]"
8989
9090
To work with ONNX-compatible run times and libraries designed to maximize performance and model portability, install the ``onnx`` module. It includes the following libraries, ``onnx``, ``onnxruntime``, ``onnxmltools``, ``skl2onnx``, ``xgboost``, ``lightgbm`` and libraries from the ``viz`` module.
9191

9292
.. code-block:: bash
9393
94-
$ python3 -m pip install oracle-ads[onnx]
94+
$ python3 -m pip install "oracle-ads[onnx]"
9595
9696
For infrastructure tasks, install the ``opctl`` module. It includes the following libraries, ``oci-cli``, ``docker``, ``conda-pack``, ``nbconvert``, ``nbformat``, and ``inflection``.
9797

9898
.. code-block:: bash
9999
100-
$ python3 -m pip install oracle-ads[opctl]
100+
$ python3 -m pip install "oracle-ads[opctl]"
101101
102102
For hyperparameter optimization tasks install the ``optuna`` module. It includes the ``optuna`` and libraries from the ``viz`` module.
103103

104104
.. code-block:: bash
105105
106-
$ python3 -m pip install oracle-ads[optuna]
106+
$ python3 -m pip install "oracle-ads[optuna]"
107107
108108
For Spark tasks install the ``spark`` module.
109109

110110
.. code-block:: bash
111111
112-
$ python3 -m pip install oracle-ads[spark]
112+
$ python3 -m pip install "oracle-ads[spark]"
113113
114114
Install the ``tensorflow`` module to include ``tensorflow`` and libraries from the ``viz`` module.
115115

116116
.. code-block:: bash
117117
118-
$ python3 -m pip install oracle-ads[tensorflow]
118+
$ python3 -m pip install "oracle-ads[tensorflow]"
119119
120120
For text related tasks, install the ``text`` module. This will include the ``wordcloud``, ``spacy`` libraries.
121121

122122
.. code-block:: bash
123123
124-
$ python3 -m pip install oracle-ads[text]
124+
$ python3 -m pip install "oracle-ads[text]"
125125
126126
Install the ``torch`` module to include ``pytorch`` and libraries from the ``viz`` module.
127127

128128
.. code-block:: bash
129129
130-
$ python3 -m pip install oracle-ads[torch]
130+
$ python3 -m pip install "oracle-ads[torch]"
131131
132132
Install the ``viz`` module to include libraries for visualization tasks. Some of the key packages are ``bokeh``, ``folium``, ``seaborn`` and related packages.
133133

134134
.. code-block:: bash
135135
136-
$ python3 -m pip install oracle-ads[viz]
136+
$ python3 -m pip install "oracle-ads[viz]"
137137
138138
See ``pyproject.toml`` file ``[project.optional-dependencies]`` section for full list of modules and its list of extra libraries.
139139

@@ -143,4 +143,4 @@ Multiple extra dependencies can be installed together. For example:
143143

144144
.. code-block:: bash
145145
146-
$ python3 -m pip install oracle-ads[notebook,viz,text]
146+
$ python3 -m pip install "oracle-ads[notebook,viz,text]"

docs/source/user_guide/operators/forecasting_operator/faq.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,7 @@ More details in the documentation here: https://docs.oracle.com/en-us/iaas/tools
99
**How do I learn More about AutoTS?**
1010

1111
More details in the documentation here: https://winedarksea.github.io/AutoTS/build/html/source/tutorial.html
12+
13+
**Pip Install Failing with "ERROR: No matching distribution found for oracle-automlx==23.2.3; extra == "forecast""**
14+
15+
Automlx only supports Python<=3.8, != 3.9, <= 3.10.7 . If you are builing in Python 3.9 or 3.10.8+, no automlx distribution will be available. It's recommended to use the conda pack available through Notebook Sessions, or to use Python 3.8

0 commit comments

Comments
 (0)