Skip to content

Commit 3b1be52

Browse files
committed
test pandas df directly
1 parent 3838d2a commit 3b1be52

File tree

2 files changed

+74
-32
lines changed

2 files changed

+74
-32
lines changed

ads/opctl/operator/lowcode/common/transformations.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,12 +210,13 @@ def _outlier_treatment(self, df):
210210
-------
211211
A new Pandas DataFrame with treated outliears.
212212
"""
213-
df["z_score"] = (
213+
return df
214+
df["__z_score"] = (
214215
df[self.target_column_name]
215216
.groupby(DataColumns.Series)
216217
.transform(lambda x: (x - x.mean()) / x.std())
217218
)
218-
outliers_mask = df["z_score"].abs() > 3
219+
outliers_mask = df["__z_score"].abs() > 3
219220

220221
if df[self.target_column_name].dtype == np.int:
221222
df[self.target_column_name].astype(np.float)
@@ -225,7 +226,7 @@ def _outlier_treatment(self, df):
225226
.groupby(DataColumns.Series)
226227
.transform(lambda x: np.median(x))
227228
)
228-
df_ret = df.drop("z_score", axis=1)
229+
df_ret = df.drop("__z_score", axis=1)
229230
return df_ret
230231

231232
def _check_historical_dataset(self, df):

tests/operators/forecast/test_datasets.py

Lines changed: 70 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
44
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
55
import os
6+
import json
67
import yaml
78
import tempfile
89
import subprocess
@@ -15,6 +16,8 @@
1516
import pathlib
1617
import datetime
1718
from ads.opctl.operator.cmd import run
19+
from ads.opctl.operator.lowcode.forecast.__main__ import operate as forecast_operate
20+
from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorConfig
1821

1922

2023
DATASET_PREFIX = f"{os.path.dirname(os.path.abspath(__file__))}/../data/timeseries/"
@@ -28,10 +31,10 @@
2831

2932
MODELS = [
3033
"arima",
31-
"automlx",
34+
# "automlx",
3235
"prophet",
3336
"neuralprophet",
34-
"autots",
37+
# "autots",
3538
# "lgbforecast",
3639
"auto-select",
3740
]
@@ -159,38 +162,76 @@ def test_load_datasets(model, data_details):
159162
print(train_metrics)
160163

161164

162-
@pytest.mark.parametrize("model", MODELS[:-1])
163-
def test_pandas_df_historical(model):
165+
@pytest.mark.parametrize("model", MODELS[:1])
166+
def test_pandas_to_historical(model):
164167
df = pd.read_csv(f"{DATASET_PREFIX}dataset1.csv")
165168

166-
yaml_i = deepcopy(TEMPLATE_YAML)
167-
yaml_i["spec"]["model"] = model
168-
yaml_i["spec"]["historical_data"].pop("url")
169-
yaml_i["spec"]["historical_data"]["data"] = df
170-
yaml_i["spec"]["target_column"] = "Y"
171-
yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL
172-
yaml_i["spec"]["horizon"] = 5
173-
run(yaml_i, backend="operator.local", debug=False)
174-
subprocess.run(f"ls -a {output_data_path}", shell=True)
169+
with tempfile.TemporaryDirectory() as tmpdirname:
170+
output_data_path = f"{tmpdirname}/results"
171+
yaml_i = deepcopy(TEMPLATE_YAML)
172+
yaml_i["spec"]["model"] = model
173+
yaml_i["spec"]["historical_data"].pop("url")
174+
yaml_i["spec"]["historical_data"]["data"] = df
175+
yaml_i["spec"]["target_column"] = "Y"
176+
yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL
177+
yaml_i["spec"]["horizon"] = PERIODS
178+
yaml_i["spec"]["output_directory"]["url"] = output_data_path
179+
operator_config = ForecastOperatorConfig.from_dict(yaml_i)
180+
forecast_operate(operator_config)
181+
check_output_for_errors(output_data_path)
175182

176183

177-
@pytest.mark.parametrize("model", MODELS[:-1])
178-
def test_pandas_historical_test(model):
184+
@pytest.mark.parametrize("model", ["neuralprophet"])
185+
def test_pandas_to_historical_test(model):
179186
df = pd.read_csv(f"{DATASET_PREFIX}dataset4.csv")
180-
df_train = df[:-1]
181-
df_test = df[-1:]
187+
df_train = df[:-PERIODS]
188+
df_test = df[-PERIODS:]
182189

183-
yaml_i = deepcopy(TEMPLATE_YAML)
184-
yaml_i["spec"]["model"] = model
185-
yaml_i["spec"]["historical_data"].pop("url")
186-
yaml_i["spec"]["historical_data"]["data"] = df_train
187-
yaml_i["spec"]["test_data"]["data"] = df_test
188-
yaml_i["spec"]["target_column"] = "Y"
189-
yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL
190-
yaml_i["spec"]["horizon"] = 5
191-
run(yaml_i, backend="operator.local", debug=False)
192-
subprocess.run(f"ls -a {output_data_path}", shell=True)
190+
with tempfile.TemporaryDirectory() as tmpdirname:
191+
output_data_path = f"{tmpdirname}/results"
192+
yaml_i = deepcopy(TEMPLATE_YAML)
193+
yaml_i["spec"]["model"] = model
194+
yaml_i["spec"]["historical_data"].pop("url")
195+
yaml_i["spec"]["historical_data"]["data"] = df_train
196+
yaml_i["spec"]["test_data"] = {"data": df_test}
197+
yaml_i["spec"]["target_column"] = "Y"
198+
yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL
199+
yaml_i["spec"]["horizon"] = PERIODS
200+
yaml_i["spec"]["output_directory"]["url"] = output_data_path
201+
operator_config = ForecastOperatorConfig.from_dict(yaml_i)
202+
forecast_operate(operator_config)
203+
check_output_for_errors(output_data_path)
204+
test_metrics = pd.read_csv(f"{output_data_path}/metrics.csv")
205+
print(test_metrics)
193206

207+
def check_output_for_errors(output_data_path):
208+
# try:
209+
# List files in the directory
210+
result = subprocess.run(f"ls -a {output_data_path}", shell=True, check=True, text=True, capture_output=True)
211+
files = result.stdout.splitlines()
212+
213+
# Check if errors.json is in the directory
214+
if "errors.json" in files:
215+
errors_file_path = os.path.join(output_data_path, "errors.json")
216+
217+
# Read the errors.json file
218+
with open(errors_file_path, "r") as f:
219+
errors_content = json.load(f)
220+
221+
# Extract and raise the error message
222+
# error_message = errors_content.get("message", "An error occurred.")
223+
raise Exception(errors_content)
224+
225+
print("No errors.json file found. Directory is clear.")
226+
227+
# except subprocess.CalledProcessError as e:
228+
# print(f"Error listing files in directory: {e}")
229+
# except FileNotFoundError:
230+
# print("The directory does not exist.")
231+
# except json.JSONDecodeError:
232+
# print("errors.json is not a valid JSON file.")
233+
# except Exception as e:
234+
# print(f"Raised error: {e}")
194235

195236
def run_operator(
196237
historical_data_path,
@@ -229,9 +270,9 @@ def run_operator(
229270
sleep(0.1)
230271
subprocess.run(f"ls -a {output_data_path}", shell=True)
231272

232-
test_metrics = pd.read_csv(f"{tmpdirname}/results/test_metrics.csv")
273+
test_metrics = pd.read_csv(f"{tmpdirname}/results/metrics.csv")
233274
print(test_metrics)
234-
train_metrics = pd.read_csv(f"{tmpdirname}/results/metrics.csv")
275+
train_metrics = pd.read_csv(f"{tmpdirname}/results/train_metrics.csv")
235276
print(train_metrics)
236277

237278

0 commit comments

Comments
 (0)