Skip to content

Commit e1d2587

Browse files
committed
adding new github action
1 parent bf7e119 commit e1d2587

File tree

4 files changed

+273
-201
lines changed

4 files changed

+273
-201
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
name: "[Py3.8][Py3.10] Operators Tests"
2+
3+
on:
4+
workflow_dispatch:
5+
pull_request:
6+
paths:
7+
- "ads/opctl/operator/**"
8+
- "**requirements.txt"
9+
- ".github/workflows/run-operators*.yml"
10+
11+
# Cancel in progress workflows on pull_requests.
12+
# https://docs.github.com/en/actions/using-jobs/using-concurrency#example-using-a-fallback-value
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
15+
cancel-in-progress: true
16+
17+
permissions:
18+
contents: read
19+
20+
# hack for https://github.com/actions/cache/issues/810#issuecomment-1222550359
21+
env:
22+
SEGMENT_DOWNLOAD_TIMEOUT_MINS: 5
23+
24+
jobs:
25+
test:
26+
name: python ${{ matrix.python-version }}, default_setup
27+
runs-on: ubuntu-latest
28+
timeout-minutes: 60
29+
30+
strategy:
31+
fail-fast: false
32+
matrix:
33+
python-version: ["3.8", "3.10"]
34+
35+
steps:
36+
- uses: actions/checkout@v3
37+
38+
- uses: actions/setup-python@v4
39+
with:
40+
python-version: ${{ matrix.python-version }}
41+
cache: "pip"
42+
cache-dependency-path: |
43+
pyproject.toml
44+
"**requirements.txt"
45+
"test-requirements-operators.txt"
46+
47+
- uses: ./.github/workflows/set-dummy-conf
48+
name: "Test config setup"
49+
50+
- name: "Run default_setup tests folder ONLY with minimum ADS dependencies"
51+
timeout-minutes: 15
52+
shell: bash
53+
env:
54+
NoDependency: True
55+
run: |
56+
set -x # print commands that are executed
57+
$CONDA/bin/conda init
58+
source /home/runner/.bashrc
59+
pip install -r test-requirements-operators.txt
60+
python -m pytest -v -p no:warnings --durations=5 tests/operators

test-requirements-operators.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
-e .
2+
click
3+
coverage
4+
faker
5+
mock
6+
pip
7+
pytest
8+
pytest-codecov
9+
pytest-xdist
10+
darts
11+
plotly
12+
docker

tests/operators/test_datasets.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright (c) 2023 Oracle and/or its affiliates.
4+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5+
6+
from darts import datasets as d_datasets
7+
import yaml
8+
import tempfile
9+
import subprocess
10+
import pandas as pd
11+
import pytest
12+
from time import sleep, time
13+
from copy import deepcopy
14+
from pathlib import Path
15+
import random
16+
17+
18+
DATASETS_LIST = [
19+
"AirPassengersDataset",
20+
"AusBeerDataset",
21+
"AustralianTourismDataset",
22+
"ETTh1Dataset",
23+
# 'ETTh2Dataset',
24+
# 'ETTm1Dataset',
25+
# 'ETTm2Dataset',
26+
# 'ElectricityDataset',
27+
"EnergyDataset",
28+
"ExchangeRateDataset",
29+
"GasRateCO2Dataset",
30+
"HeartRateDataset",
31+
"ILINetDataset",
32+
"IceCreamHeaterDataset",
33+
"MonthlyMilkDataset",
34+
"MonthlyMilkIncompleteDataset",
35+
"SunspotsDataset",
36+
"TaylorDataset",
37+
"TemperatureDataset",
38+
"TrafficDataset",
39+
"USGasolineDataset",
40+
"UberTLCDataset",
41+
"WeatherDataset",
42+
"WineDataset",
43+
"WoolyDataset",
44+
]
45+
46+
TEMPLATE_YAML = {
47+
"kind": "operator",
48+
"type": "forecast",
49+
"version": "v1",
50+
"spec": {
51+
"historical_data": {
52+
"url": None,
53+
},
54+
"output_directory": {
55+
"url": "results",
56+
},
57+
"model": None,
58+
"target_column": None,
59+
"datetime_column": {
60+
"name": None,
61+
},
62+
"target_category_columns": [],
63+
"horizon": None,
64+
"generate_explanations": False,
65+
},
66+
}
67+
68+
69+
PERIODS = 5
70+
MAX_ADDITIONAL_COLS = 3
71+
SAMPLE_FRACTION = 1
72+
73+
parameters_short = []
74+
75+
for dataset_i in DATASETS_LIST[2:3]: # + [DATASETS_LIST[-2]]
76+
for model in [
77+
"arima",
78+
"automlx",
79+
"prophet",
80+
"neuralprophet",
81+
"autots",
82+
"auto",
83+
]: # ["arima", "automlx", "prophet", "neuralprophet", "autots", "auto"]
84+
parameters_short.append((model, dataset_i))
85+
86+
87+
@pytest.mark.parametrize("model, dataset_name", parameters_short)
88+
def test_load_datasets(model, dataset_name):
89+
dataset_i = getattr(d_datasets, dataset_name)().load()
90+
datetime_col = dataset_i.time_index.name
91+
92+
columns = dataset_i.components
93+
target = dataset_i[columns[0]][:-PERIODS]
94+
test = dataset_i[columns[0]][-PERIODS:]
95+
96+
print(dataset_name, len(columns), len(target))
97+
with tempfile.TemporaryDirectory() as tmpdirname:
98+
historical_data_path = f"{tmpdirname}/primary_data.csv"
99+
additional_data_path = f"{tmpdirname}/add_data.csv"
100+
test_data_path = f"{tmpdirname}/test_data.csv"
101+
output_data_path = f"{tmpdirname}/results"
102+
yaml_i = deepcopy(TEMPLATE_YAML)
103+
generate_train_metrics = True # bool(random.getrandbits(1))
104+
105+
# TODO: Open bug ticket so that series is not required
106+
df_i = target.pd_dataframe().reset_index()
107+
df_i["Series"] = "A"
108+
if model == "automlx" and dataset_name == "AustralianTourismDataset":
109+
df_i[datetime_col] = pd.to_datetime(
110+
[f"{x+1:03d}" for x in df_i[datetime_col]], format="%j"
111+
)
112+
113+
df_i.to_csv(historical_data_path, index=False)
114+
# .sample(frac=SAMPLE_FRACTION).sort_values(by=datetime_col)
115+
116+
test_df = test.pd_dataframe().reset_index()
117+
test_df["Series"] = "A"
118+
if model == "automlx" and dataset_name == "AustralianTourismDataset":
119+
test_df[datetime_col] = pd.to_datetime(
120+
[f"{x+1:03d}" for x in test_df[datetime_col]], format="%j"
121+
)
122+
test_df.to_csv(test_data_path, index=False)
123+
124+
if len(columns) > 1:
125+
additional_cols = columns[1 : min(len(columns), MAX_ADDITIONAL_COLS)]
126+
additional_data = dataset_i[list(additional_cols)]
127+
df_additional = additional_data.pd_dataframe().reset_index()
128+
df_additional["Series"] = "A"
129+
if model == "automlx" and dataset_name == "AustralianTourismDataset":
130+
df_additional[datetime_col] = pd.to_datetime(
131+
[f"{x+1:03d}" for x in df_additional[datetime_col]], format="%j"
132+
)
133+
df_additional.to_csv(additional_data_path, index=False)
134+
yaml_i["spec"]["additional_data"] = {"url": additional_data_path}
135+
136+
yaml_i["spec"]["historical_data"]["url"] = historical_data_path
137+
yaml_i["spec"]["test_data"] = {"url": test_data_path}
138+
yaml_i["spec"]["output_directory"]["url"] = output_data_path
139+
yaml_i["spec"]["model"] = model
140+
yaml_i["spec"]["target_column"] = columns[0]
141+
yaml_i["spec"]["datetime_column"]["name"] = datetime_col
142+
yaml_i["spec"]["target_category_columns"] = ["Series"]
143+
yaml_i["spec"]["horizon"] = PERIODS
144+
if (
145+
yaml_i["spec"].get("additional_data") is not None
146+
and model != "neuralprophet"
147+
):
148+
yaml_i["spec"]["generate_explanations"] = True
149+
if generate_train_metrics:
150+
yaml_i["spec"]["generate_metrics"] = generate_train_metrics
151+
if model == "autots":
152+
yaml_i["spec"]["model_kwargs"] = {"model_list": "superfast"}
153+
if model == "automlx":
154+
yaml_i["spec"]["model_kwargs"] = {"time_budget": 1}
155+
156+
forecast_yaml_filename = f"{tmpdirname}/forecast.yaml"
157+
with open(f"{tmpdirname}/forecast.yaml", "w") as f:
158+
f.write(yaml.dump(yaml_i))
159+
sleep(0.5)
160+
subprocess.run(
161+
f"ads operator run -f {forecast_yaml_filename} --debug", shell=True
162+
)
163+
sleep(0.1)
164+
subprocess.run(f"ls -a {output_data_path}", shell=True)
165+
# if yaml_i["spec"]["generate_explanations"]:
166+
# glb_expl = pd.read_csv(f"{tmpdirname}/results/global_explanation.csv")
167+
# print(glb_expl)
168+
# loc_expl = pd.read_csv(f"{tmpdirname}/results/local_explanation.csv")
169+
# print(loc_expl)
170+
171+
test_metrics = pd.read_csv(f"{tmpdirname}/results/test_metrics.csv")
172+
print(test_metrics)
173+
train_metrics = pd.read_csv(f"{tmpdirname}/results/metrics.csv")
174+
print(train_metrics)
175+
return test_metrics.iloc[0][f"{columns[0]}_A"]
176+
177+
178+
if __name__ == "__main__":
179+
failed_runs = []
180+
results = dict()
181+
timings = dict()
182+
for dataset_name in DATASETS_LIST[2:3]: # random.sample(DATASETS_LIST, 2):
183+
results[dataset_name] = dict()
184+
timings[dataset_name] = dict()
185+
for m in [
186+
"automlx"
187+
]: # ["arima", "automlx", "prophet", "neuralprophet", "autots", "auto"]:
188+
start_time = time()
189+
try:
190+
results[dataset_name][m] = test_load_datasets(
191+
model=m, dataset_name=dataset_name
192+
)
193+
except Exception as e:
194+
print(f"Failed with the following error! {e}")
195+
failed_runs.append((dataset_name, m))
196+
elapsed = time() - start_time
197+
timings[dataset_name][m] = elapsed
198+
print(f"Failed Runs: {failed_runs}")
199+
print(f"results: {pd.DataFrame(results)}")
200+
print(f"timings: {timings}")
201+
pd.DataFrame(results).to_csv("~/Desktop/AUTO_benchmark_darts.csv")

0 commit comments

Comments
 (0)