|
3 | 3 | # Copyright (c) 2023, 2025 Oracle and/or its affiliates.
|
4 | 4 | # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5 | 5 | import os
|
| 6 | +import json |
6 | 7 | import yaml
|
7 | 8 | import tempfile
|
8 | 9 | import subprocess
|
|
15 | 16 | import pathlib
|
16 | 17 | import datetime
|
17 | 18 | from ads.opctl.operator.cmd import run
|
| 19 | +from ads.opctl.operator.lowcode.forecast.__main__ import operate as forecast_operate |
| 20 | +from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorConfig |
18 | 21 |
|
19 | 22 |
|
20 | 23 | DATASET_PREFIX = f"{os.path.dirname(os.path.abspath(__file__))}/../data/timeseries/"
|
|
28 | 31 |
|
29 | 32 | MODELS = [
|
30 | 33 | "arima",
|
31 |
| - "automlx", |
| 34 | + # "automlx", |
32 | 35 | "prophet",
|
33 | 36 | "neuralprophet",
|
34 |
| - "autots", |
| 37 | + # "autots", |
35 | 38 | # "lgbforecast",
|
36 | 39 | "auto-select",
|
37 | 40 | ]
|
@@ -159,38 +162,76 @@ def test_load_datasets(model, data_details):
|
159 | 162 | print(train_metrics)
|
160 | 163 |
|
161 | 164 |
|
162 |
| -@pytest.mark.parametrize("model", MODELS[:-1]) |
163 |
| -def test_pandas_df_historical(model): |
| 165 | +@pytest.mark.parametrize("model", MODELS[:1]) |
| 166 | +def test_pandas_to_historical(model): |
164 | 167 | df = pd.read_csv(f"{DATASET_PREFIX}dataset1.csv")
|
165 | 168 |
|
166 |
| - yaml_i = deepcopy(TEMPLATE_YAML) |
167 |
| - yaml_i["spec"]["model"] = model |
168 |
| - yaml_i["spec"]["historical_data"].pop("url") |
169 |
| - yaml_i["spec"]["historical_data"]["data"] = df |
170 |
| - yaml_i["spec"]["target_column"] = "Y" |
171 |
| - yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL |
172 |
| - yaml_i["spec"]["horizon"] = 5 |
173 |
| - run(yaml_i, backend="operator.local", debug=False) |
174 |
| - subprocess.run(f"ls -a {output_data_path}", shell=True) |
| 169 | + with tempfile.TemporaryDirectory() as tmpdirname: |
| 170 | + output_data_path = f"{tmpdirname}/results" |
| 171 | + yaml_i = deepcopy(TEMPLATE_YAML) |
| 172 | + yaml_i["spec"]["model"] = model |
| 173 | + yaml_i["spec"]["historical_data"].pop("url") |
| 174 | + yaml_i["spec"]["historical_data"]["data"] = df |
| 175 | + yaml_i["spec"]["target_column"] = "Y" |
| 176 | + yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL |
| 177 | + yaml_i["spec"]["horizon"] = PERIODS |
| 178 | + yaml_i["spec"]["output_directory"]["url"] = output_data_path |
| 179 | + operator_config = ForecastOperatorConfig.from_dict(yaml_i) |
| 180 | + forecast_operate(operator_config) |
| 181 | + check_output_for_errors(output_data_path) |
175 | 182 |
|
176 | 183 |
|
177 |
| -@pytest.mark.parametrize("model", MODELS[:-1]) |
178 |
| -def test_pandas_historical_test(model): |
| 184 | +@pytest.mark.parametrize("model", ["neuralprophet"]) |
| 185 | +def test_pandas_to_historical_test(model): |
179 | 186 | df = pd.read_csv(f"{DATASET_PREFIX}dataset4.csv")
|
180 |
| - df_train = df[:-1] |
181 |
| - df_test = df[-1:] |
| 187 | + df_train = df[:-PERIODS] |
| 188 | + df_test = df[-PERIODS:] |
182 | 189 |
|
183 |
| - yaml_i = deepcopy(TEMPLATE_YAML) |
184 |
| - yaml_i["spec"]["model"] = model |
185 |
| - yaml_i["spec"]["historical_data"].pop("url") |
186 |
| - yaml_i["spec"]["historical_data"]["data"] = df_train |
187 |
| - yaml_i["spec"]["test_data"]["data"] = df_test |
188 |
| - yaml_i["spec"]["target_column"] = "Y" |
189 |
| - yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL |
190 |
| - yaml_i["spec"]["horizon"] = 5 |
191 |
| - run(yaml_i, backend="operator.local", debug=False) |
192 |
| - subprocess.run(f"ls -a {output_data_path}", shell=True) |
| 190 | + with tempfile.TemporaryDirectory() as tmpdirname: |
| 191 | + output_data_path = f"{tmpdirname}/results" |
| 192 | + yaml_i = deepcopy(TEMPLATE_YAML) |
| 193 | + yaml_i["spec"]["model"] = model |
| 194 | + yaml_i["spec"]["historical_data"].pop("url") |
| 195 | + yaml_i["spec"]["historical_data"]["data"] = df_train |
| 196 | + yaml_i["spec"]["test_data"] = {"data": df_test} |
| 197 | + yaml_i["spec"]["target_column"] = "Y" |
| 198 | + yaml_i["spec"]["datetime_column"]["name"] = DATETIME_COL |
| 199 | + yaml_i["spec"]["horizon"] = PERIODS |
| 200 | + yaml_i["spec"]["output_directory"]["url"] = output_data_path |
| 201 | + operator_config = ForecastOperatorConfig.from_dict(yaml_i) |
| 202 | + forecast_operate(operator_config) |
| 203 | + check_output_for_errors(output_data_path) |
| 204 | + test_metrics = pd.read_csv(f"{output_data_path}/metrics.csv") |
| 205 | + print(test_metrics) |
193 | 206 |
|
| 207 | +def check_output_for_errors(output_data_path): |
| 208 | + # try: |
| 209 | + # List files in the directory |
| 210 | + result = subprocess.run(f"ls -a {output_data_path}", shell=True, check=True, text=True, capture_output=True) |
| 211 | + files = result.stdout.splitlines() |
| 212 | + |
| 213 | + # Check if errors.json is in the directory |
| 214 | + if "errors.json" in files: |
| 215 | + errors_file_path = os.path.join(output_data_path, "errors.json") |
| 216 | + |
| 217 | + # Read the errors.json file |
| 218 | + with open(errors_file_path, "r") as f: |
| 219 | + errors_content = json.load(f) |
| 220 | + |
| 221 | + # Extract and raise the error message |
| 222 | + # error_message = errors_content.get("message", "An error occurred.") |
| 223 | + raise Exception(errors_content) |
| 224 | + |
| 225 | + print("No errors.json file found. Directory is clear.") |
| 226 | + |
| 227 | + # except subprocess.CalledProcessError as e: |
| 228 | + # print(f"Error listing files in directory: {e}") |
| 229 | + # except FileNotFoundError: |
| 230 | + # print("The directory does not exist.") |
| 231 | + # except json.JSONDecodeError: |
| 232 | + # print("errors.json is not a valid JSON file.") |
| 233 | + # except Exception as e: |
| 234 | + # print(f"Raised error: {e}") |
194 | 235 |
|
195 | 236 | def run_operator(
|
196 | 237 | historical_data_path,
|
@@ -229,9 +270,9 @@ def run_operator(
|
229 | 270 | sleep(0.1)
|
230 | 271 | subprocess.run(f"ls -a {output_data_path}", shell=True)
|
231 | 272 |
|
232 |
| - test_metrics = pd.read_csv(f"{tmpdirname}/results/test_metrics.csv") |
| 273 | + test_metrics = pd.read_csv(f"{tmpdirname}/results/metrics.csv") |
233 | 274 | print(test_metrics)
|
234 |
| - train_metrics = pd.read_csv(f"{tmpdirname}/results/metrics.csv") |
| 275 | + train_metrics = pd.read_csv(f"{tmpdirname}/results/train_metrics.csv") |
235 | 276 | print(train_metrics)
|
236 | 277 |
|
237 | 278 |
|
|
0 commit comments