Skip to content

Commit 0eee27d

Browse files
Merge branch 'main' into update_cli_import
2 parents 42d7e1a + fc05985 commit 0eee27d

File tree

32 files changed

+4439
-446
lines changed

32 files changed

+4439
-446
lines changed

.github/workflows/run-operators-unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
strategy:
3232
fail-fast: false
3333
matrix:
34-
python-version: ["3.8", "3.10.8"]
34+
python-version: ["3.8"]
3535

3636
steps:
3737
- uses: actions/checkout@v4

CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* @darenr @mayoor @mrDzurb @VipulMascarenhas @qiuosier

ads/common/serializer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ def from_dict(
464464
)
465465

466466
obj = cls(
467-
**{key: obj_dict.get(key) for key in allowed_fields if key in obj_dict}
467+
**{key: obj_dict.get(key) for key in allowed_fields}
468468
)
469469

470470
for key, value in obj_dict.items():

ads/dataset/label_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def fit(self, X: "pandas.DataFrame"):
5252
5353
"""
5454
for column in X.columns:
55-
if X[column].dtype.name in ["object", "category"]:
55+
if X[column].dtype.name in ["object", "category", "bool"]:
5656
X[column] = X[column].astype(str)
5757
self.label_encoders[column] = LabelEncoder()
5858
self.label_encoders[column].fit(X[column])

ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
TESTS_PATH = os.path.join(_cwd, "resources", "tests.yaml")
3030
HTML_PATH = os.path.join(_cwd, "resources", "template.html")
3131
CONFIG_PATH = os.path.join(_cwd, "resources", "config.yaml")
32-
PYTHON_VER_PATTERN = "^([3])(\.[6-9])(\.\d+)?$"
32+
PYTHON_VER_PATTERN = "^([3])(\.([6-9]|1[0-2]))(\.\d+)?$"
3333
PAR_URL = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/WyjtfVIG0uda-P3-2FmAfwaLlXYQZbvPZmfX1qg0-sbkwEQO6jpwabGr2hMDBmBp/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json"
3434

3535
TESTS = {

ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
merge_category_columns,
1111
)
1212
from ads.opctl.operator.lowcode.common.data import AbstractData
13-
from ads.opctl.operator.lowcode.common.data import AbstractData
1413
from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
1514
from ads.opctl import logger
1615
import pandas as pd
@@ -56,6 +55,10 @@ def __init__(self, spec: AnomalyOperatorSpec):
5655
self.X_valid_dict = self.valid_data.X_valid_dict
5756
self.y_valid_dict = self.valid_data.y_valid_dict
5857

58+
# Returns raw data based on the series_id i.e; the merged target_category_column value
59+
def get_raw_data_by_cat(self, category):
60+
return self._data.get_raw_data_by_cat(category)
61+
5962

6063
class AnomalyOutput:
6164
def __init__(self, date_column):
@@ -94,38 +97,28 @@ def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
9497
outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
9598
return outliers
9699

97-
def get_inliers(self, data):
100+
def get_inliers(self, datasets):
98101
inliers = pd.DataFrame()
99102

100103
for category in self.list_categories():
101104
inliers = pd.concat(
102105
[
103106
inliers,
104-
self.get_inliers_by_cat(
105-
category,
106-
data[data[OutputColumns.Series] == category]
107-
.reset_index(drop=True)
108-
.drop(OutputColumns.Series, axis=1),
109-
),
107+
self.get_inliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
110108
],
111109
axis=0,
112110
ignore_index=True,
113111
)
114112
return inliers
115113

116-
def get_outliers(self, data):
114+
def get_outliers(self, datasets):
117115
outliers = pd.DataFrame()
118116

119117
for category in self.list_categories():
120118
outliers = pd.concat(
121119
[
122120
outliers,
123-
self.get_outliers_by_cat(
124-
category,
125-
data[data[OutputColumns.Series] == category]
126-
.reset_index(drop=True)
127-
.drop(OutputColumns.Series, axis=1),
128-
),
121+
self.get_outliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
129122
],
130123
axis=0,
131124
ignore_index=True,

ads/opctl/operator/lowcode/anomaly/model/automlx.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ class AutoMLXOperatorModel(AnomalyOperatorBaseModel):
2626
)
2727
def _build_model(self) -> pd.DataFrame:
2828
from automlx import init
29+
import logging
2930
try:
30-
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
31+
init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
3132
except Exception as e:
3233
logger.info("Ray already initialized")
3334
date_column = self.spec.datetime_column.name

ads/opctl/operator/lowcode/anomaly/model/base_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -272,15 +272,15 @@ def _save_report(
272272
f2.write(f1.read())
273273

274274
if self.spec.generate_inliers:
275-
inliers = anomaly_output.get_inliers(self.datasets.data)
275+
inliers = anomaly_output.get_inliers(self.datasets)
276276
write_data(
277277
data=inliers,
278278
filename=os.path.join(unique_output_dir, self.spec.inliers_filename),
279279
format="csv",
280280
storage_options=storage_options,
281281
)
282282

283-
outliers = anomaly_output.get_outliers(self.datasets.data)
283+
outliers = anomaly_output.get_outliers(self.datasets)
284284
write_data(
285285
data=outliers,
286286
filename=os.path.join(unique_output_dir, self.spec.outliers_filename),

ads/opctl/operator/lowcode/anomaly/operator_config.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,21 @@ class TestData(InputData):
3636
"""Class representing operator specification test data details."""
3737

3838

39+
@dataclass(repr=True)
40+
class PreprocessingSteps(DataClassSerializable):
41+
"""Class representing preprocessing steps for operator."""
42+
43+
missing_value_imputation: bool = True
44+
outlier_treatment: bool = False
45+
46+
47+
@dataclass(repr=True)
48+
class DataPreprocessor(DataClassSerializable):
49+
"""Class representing operator specification preprocessing details."""
50+
51+
enabled: bool = True
52+
steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
53+
3954
@dataclass(repr=True)
4055
class AnomalyOperatorSpec(DataClassSerializable):
4156
"""Class representing operator specification."""
@@ -74,7 +89,9 @@ def __post_init__(self):
7489
self.generate_inliers if self.generate_inliers is not None else False
7590
)
7691
self.model_kwargs = self.model_kwargs or dict()
77-
92+
self.preprocessing = (
93+
self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
94+
)
7895

7996
@dataclass(repr=True)
8097
class AnomalyOperatorConfig(OperatorConfig):

ads/opctl/operator/lowcode/anomaly/schema.yaml

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -307,11 +307,23 @@ spec:
307307
description: "When provided, target_category_columns [list] indexes the data into multiple related datasets for anomaly detection"
308308

309309
preprocessing:
310-
type: boolean
310+
type: dict
311311
required: false
312-
default: true
313-
meta:
314-
description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
312+
schema:
313+
enabled:
314+
type: boolean
315+
required: false
316+
default: true
317+
meta:
318+
description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
319+
steps:
320+
type: dict
321+
required: false
322+
schema:
323+
missing_value_imputation:
324+
type: boolean
325+
required: false
326+
default: true
315327

316328
generate_report:
317329
type: boolean

0 commit comments

Comments
 (0)