26
26
ForecastInputDataError ,
27
27
)
28
28
from ads .opctl .operator .cmd import run
29
-
29
+ import math
30
30
31
31
NUM_ROWS = 1000
32
32
NUM_SERIES = 10
@@ -172,8 +172,9 @@ def run_yaml(tmpdirname, yaml_i, output_data_path):
172
172
run (yaml_i , backend = "operator.local" , debug = True )
173
173
subprocess .run (f"ls -a { output_data_path } " , shell = True )
174
174
175
- test_metrics = pd .read_csv (f"{ tmpdirname } /results/test_metrics.csv" )
176
- print (test_metrics )
175
+ if 'test_data' in yaml_i ['spec' ]:
176
+ test_metrics = pd .read_csv (f"{ tmpdirname } /results/test_metrics.csv" )
177
+ print (test_metrics )
177
178
train_metrics = pd .read_csv (f"{ tmpdirname } /results/metrics.csv" )
178
179
print (train_metrics )
179
180
@@ -205,7 +206,8 @@ def populate_yaml(
205
206
yaml_i ["spec" ]["datetime_column" ]["name" ] = "Date"
206
207
yaml_i ["spec" ]["target_category_columns" ] = ["Store" ]
207
208
yaml_i ["spec" ]["horizon" ] = HORIZON
208
- yaml_i ["spec" ]["preprocessing" ] = preprocessing
209
+ if preprocessing :
210
+ yaml_i ["spec" ]["preprocessing" ] = preprocessing
209
211
if generate_train_metrics :
210
212
yaml_i ["spec" ]["generate_metrics" ] = generate_train_metrics
211
213
if model == "autots" :
@@ -431,6 +433,49 @@ def test_invalid_dates(operator_setup, model):
431
433
)
432
434
433
435
436
+ def test_disabling_outlier_treatment (operator_setup ):
437
+ tmpdirname = operator_setup
438
+ NUM_ROWS = 100
439
+ hist_data_0 = pd .concat (
440
+ [
441
+ HISTORICAL_DATETIME_COL [: NUM_ROWS - HORIZON ],
442
+ TARGET_COL [: NUM_ROWS - HORIZON ],
443
+ ],
444
+ axis = 1 ,
445
+ )
446
+ outliers = [1000 , - 800 ]
447
+ hist_data_0 .at [40 , 'Sales' ] = outliers [0 ]
448
+ hist_data_0 .at [75 , 'Sales' ] = outliers [1 ]
449
+ historical_data_path , additional_data_path , test_data_path = setup_artificial_data (
450
+ tmpdirname , hist_data_0
451
+ )
452
+
453
+ yaml_i , output_data_path = populate_yaml (
454
+ tmpdirname = tmpdirname ,
455
+ model = "arima" ,
456
+ historical_data_path = historical_data_path
457
+ )
458
+ yaml_i ["spec" ].pop ("target_category_columns" )
459
+ yaml_i ["spec" ].pop ("additional_data" )
460
+
461
+ # running default pipeline where outlier will be treated
462
+ run_yaml (tmpdirname = tmpdirname , yaml_i = yaml_i , output_data_path = output_data_path )
463
+ forecast_without_outlier = pd .read_csv (f"{ tmpdirname } /results/forecast.csv" )
464
+ input_vals_without_outlier = set (forecast_without_outlier ['input_value' ])
465
+ assert all (
466
+ item not in input_vals_without_outlier for item in outliers ), "forecast file should not contain any outliers"
467
+
468
+ # switching off outlier_treatment
469
+ preprocessing_steps = {"missing_value_imputation" : True , "outlier_treatment" : False }
470
+ preprocessing = {"enabled" : True , "steps" : preprocessing_steps }
471
+ yaml_i ["spec" ]["preprocessing" ] = preprocessing
472
+ run_yaml (tmpdirname = tmpdirname , yaml_i = yaml_i , output_data_path = output_data_path )
473
+ forecast_with_outlier = pd .read_csv (f"{ tmpdirname } /results/forecast.csv" )
474
+ input_vals_with_outlier = set (forecast_with_outlier ['input_value' ])
475
+ assert all (
476
+ item in input_vals_with_outlier for item in outliers ), "forecast file should contain all the outliers"
477
+
478
+
434
479
@pytest .mark .parametrize ("model" , MODELS )
435
480
def test_2_series (operator_setup , model ):
436
481
# Test w and w/o add data
@@ -456,7 +501,7 @@ def split_df(df):
456
501
historical_data_path , additional_data_path , test_data_path = setup_artificial_data (
457
502
tmpdirname , hist_data , add_data , test_data
458
503
)
459
- preprocessing_steps = {"missing_value_imputation" : True , "outlier_detection " : False }
504
+ preprocessing_steps = {"missing_value_imputation" : True , "outlier_treatment " : False }
460
505
yaml_i , output_data_path = populate_yaml (
461
506
tmpdirname = tmpdirname ,
462
507
model = model ,
0 commit comments