speed up slowest test

ahosler · ahosler · commit 8bc78e321635 · 2024-04-19T12:50:28.000+01:00
diff --git a/tests/operators/forecast/test_errors.py b/tests/operators/forecast/test_errors.py
@@ -259,6 +259,7 @@ def setup_faulty_rossman():
     additional_data_path = f"{data_folder}/rs_2_add_encoded.csv"
     return historical_data_path, additional_data_path
 
+
 def setup_small_rossman():
     curr_dir = pathlib.Path(__file__).parent.resolve()
     data_folder = f"{curr_dir}/../data/"
@@ -396,7 +397,7 @@ def test_0_series(operator_setup, model):
         historical_data_path=historical_data_path,
         additional_data_path=additional_data_path,
         test_data_path=test_data_path,
-        preprocessing={"enabled": False}
+        preprocessing={"enabled": False},
     )
     with pytest.raises(DataMismatchError):
         run_yaml(
@@ -465,36 +466,36 @@ def test_disabling_outlier_treatment(operator_setup):
         axis=1,
     )
     outliers = [1000, -800]
-    hist_data_0.at[40, 'Sales'] = outliers[0]
-    hist_data_0.at[75, 'Sales'] = outliers[1]
+    hist_data_0.at[40, "Sales"] = outliers[0]
+    hist_data_0.at[75, "Sales"] = outliers[1]
     historical_data_path, additional_data_path, test_data_path = setup_artificial_data(
         tmpdirname, hist_data_0
     )
 
     yaml_i, output_data_path = populate_yaml(
-        tmpdirname=tmpdirname,
-        model="arima",
-        historical_data_path=historical_data_path
+        tmpdirname=tmpdirname, model="arima", historical_data_path=historical_data_path
     )
     yaml_i["spec"].pop("target_category_columns")
     yaml_i["spec"].pop("additional_data")
 
     # running default pipeline where outlier will be treated
     run_yaml(tmpdirname=tmpdirname, yaml_i=yaml_i, output_data_path=output_data_path)
     forecast_without_outlier = pd.read_csv(f"{tmpdirname}/results/forecast.csv")
-    input_vals_without_outlier = set(forecast_without_outlier['input_value'])
+    input_vals_without_outlier = set(forecast_without_outlier["input_value"])
     assert all(
-        item not in input_vals_without_outlier for item in outliers), "forecast file should not contain any outliers"
+        item not in input_vals_without_outlier for item in outliers
+    ), "forecast file should not contain any outliers"
 
     # switching off outlier_treatment
     preprocessing_steps = {"missing_value_imputation": True, "outlier_treatment": False}
     preprocessing = {"enabled": True, "steps": preprocessing_steps}
     yaml_i["spec"]["preprocessing"] = preprocessing
     run_yaml(tmpdirname=tmpdirname, yaml_i=yaml_i, output_data_path=output_data_path)
     forecast_with_outlier = pd.read_csv(f"{tmpdirname}/results/forecast.csv")
-    input_vals_with_outlier = set(forecast_with_outlier['input_value'])
+    input_vals_with_outlier = set(forecast_with_outlier["input_value"])
     assert all(
-        item in input_vals_with_outlier for item in outliers), "forecast file should contain all the outliers"
+        item in input_vals_with_outlier for item in outliers
+    ), "forecast file should contain all the outliers"
 
 
 @pytest.mark.parametrize("model", MODELS)
@@ -529,7 +530,7 @@ def split_df(df):
         historical_data_path=historical_data_path,
         additional_data_path=additional_data_path,
         test_data_path=test_data_path,
-        preprocessing={"enabled": True, "steps": preprocessing_steps}
+        preprocessing={"enabled": True, "steps": preprocessing_steps},
     )
     with pytest.raises(DataMismatchError):
         # 4 columns in historical data, but only 1 cat col specified
@@ -561,8 +562,8 @@ def test_all_series_failure(model):
     )
     preprocessing_steps = {"missing_value_imputation": True, "outlier_treatment": False}
     yaml_i["spec"]["model"] = model
-    yaml_i['spec']['horizon'] = 10
-    yaml_i['spec']['preprocessing'] = preprocessing_steps
+    yaml_i["spec"]["horizon"] = 10
+    yaml_i["spec"]["preprocessing"] = preprocessing_steps
     if yaml_i["spec"].get("additional_data") is not None and model != "autots":
         yaml_i["spec"]["generate_explanations"] = True
     if model == "autots":
@@ -571,14 +572,15 @@ def test_all_series_failure(model):
         yaml_i["spec"]["model_kwargs"] = {"time_budget": 1}
 
     module_to_patch = {
-        "arima": 'pmdarima.auto_arima',
-        "autots": 'autots.AutoTS',
-        "automlx": 'automlx.Pipeline',
-        "prophet": 'prophet.Prophet',
-        "neuralprophet": 'neuralprophet.NeuralProphet'
+        "arima": "pmdarima.auto_arima",
+        "autots": "autots.AutoTS",
+        "automlx": "automlx.Pipeline",
+        "prophet": "prophet.Prophet",
+        "neuralprophet": "neuralprophet.NeuralProphet",
     }
-    with patch(module_to_patch[model], side_effect=Exception("Custom exception message")):
-
+    with patch(
+        module_to_patch[model], side_effect=Exception("Custom exception message")
+    ):
         run(yaml_i, backend="operator.local", debug=False)
 
         report_path = f"{output_data_path}/report.html"
@@ -588,17 +590,26 @@ def test_all_series_failure(model):
         assert os.path.exists(error_path), f"Error file not found at {error_path}"
 
         # Additionally, you can read the content of the error.json and assert its content
-        with open(error_path, 'r') as error_file:
+        with open(error_path, "r") as error_file:
             error_content = json.load(error_file)
-            assert "Custom exception message" in error_content["1"]["error"], "Error message mismatch"
-            assert "Custom exception message" in error_content["13"]["error"], "Error message mismatch"
+            assert (
+                "Custom exception message" in error_content["1"]["error"]
+            ), "Error message mismatch"
+            assert (
+                "Custom exception message" in error_content["13"]["error"]
+            ), "Error message mismatch"
 
         if yaml_i["spec"]["generate_explanations"]:
             global_fn = f"{tmpdirname}/results/global_explanation.csv"
-            assert os.path.exists(global_fn), f"Global explanation file not found at {report_path}"
+            assert os.path.exists(
+                global_fn
+            ), f"Global explanation file not found at {report_path}"
 
             local_fn = f"{tmpdirname}/results/local_explanation.csv"
-            assert os.path.exists(local_fn), f"Local explanation file not found at {report_path}"
+            assert os.path.exists(
+                local_fn
+            ), f"Local explanation file not found at {report_path}"
+
 
 @pytest.mark.parametrize("model", MODELS)
 def test_arima_automlx_errors(operator_setup, model):
@@ -611,29 +622,38 @@ def test_arima_automlx_errors(operator_setup, model):
     )
 
     """
-    Arima was failing for constant trend when there are constant columns and when there are boolean columns . 
-    We added label encoding for boolean and are dropping columns with constant value for arima with constant trend. 
+    Arima was failing for constant trend when there are constant columns and when there are boolean columns .
+    We added label encoding for boolean and are dropping columns with constant value for arima with constant trend.
     This test checks that report, metrics, explanations are generated for this case.
     """
 
     """
-    series 13 in this data has missing dates and automlx fails for this with DatetimeIndex error. This test checks that 
+    series 13 in this data has missing dates and automlx fails for this with DatetimeIndex error. This test checks that
     outputs get generated and that error is shown in errors.json
     """
 
     """
-    explanations generation is failing when boolean columns are passed. 
-    TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced 
+    explanations generation is failing when boolean columns are passed.
+    TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced
      any supported types according to the casting rule ''safe''
     Added label encoding before passing data to explainer
     """
     preprocessing_steps = {"missing_value_imputation": True, "outlier_treatment": False}
-    yaml_i['spec']['horizon'] = 10
-    yaml_i['spec']['preprocessing'] = preprocessing_steps
-    yaml_i['spec']['generate_explanations'] = True
-    yaml_i['spec']['model'] = model
+    yaml_i["spec"]["horizon"] = 10
+    yaml_i["spec"]["preprocessing"] = preprocessing_steps
+    yaml_i["spec"]["generate_explanations"] = True
+    yaml_i["spec"]["model"] = model
+    if model == "autots":
+        yaml_i["spec"]["model_kwargs"] = {"model_list": "superfast"}
+    if model == "automlx":
+        yaml_i["spec"]["model_kwargs"] = {"time_budget": 1}
 
-    run_yaml(tmpdirname=tmpdirname, yaml_i=yaml_i, output_data_path=output_data_path, test_metrics_check=False)
+    run_yaml(
+        tmpdirname=tmpdirname,
+        yaml_i=yaml_i,
+        output_data_path=output_data_path,
+        test_metrics_check=False,
+    )
 
     report_path = f"{tmpdirname}/results/report.html"
     assert os.path.exists(report_path), f"Report file not found at {report_path}"
@@ -642,23 +662,28 @@ def test_arima_automlx_errors(operator_setup, model):
     assert os.path.exists(forecast_path), f"Forecast file not found at {report_path}"
     assert not pd.read_csv(forecast_path).empty
 
-
     error_path = f"{tmpdirname}/results/errors.json"
     if model == "arima":
         assert not os.path.exists(error_path), f"Error file not found at {error_path}"
     elif model == "automlx":
         assert os.path.exists(error_path), f"Error file not found at {error_path}"
-        with open(error_path, 'r') as error_file:
+        with open(error_path, "r") as error_file:
             error_content = json.load(error_file)
-            assert "Input data does not have a consistent (in terms of diff) DatetimeIndex." in error_content["13"][
-                "error"], "Error message mismatch"
+            assert (
+                "Input data does not have a consistent (in terms of diff) DatetimeIndex."
+                in error_content["13"]["error"]
+            ), "Error message mismatch"
 
     if model != "autots":
         global_fn = f"{tmpdirname}/results/global_explanation.csv"
-        assert os.path.exists(global_fn), f"Global explanation file not found at {report_path}"
+        assert os.path.exists(
+            global_fn
+        ), f"Global explanation file not found at {report_path}"
 
         local_fn = f"{tmpdirname}/results/local_explanation.csv"
-        assert os.path.exists(local_fn), f"Local explanation file not found at {report_path}"
+        assert os.path.exists(
+            local_fn
+        ), f"Local explanation file not found at {report_path}"
 
         glb_expl = pd.read_csv(global_fn, index_col=0)
         loc_expl = pd.read_csv(local_fn)
@@ -680,13 +705,20 @@ def test_date_format(operator_setup, model):
         historical_data_path=historical_data_path,
         additional_data_path=additional_data_path,
     )
-    yaml_i['spec']['horizon'] = 10
+    yaml_i["spec"]["horizon"] = 10
     yaml_i["spec"]["model"] = model
     if model == "autots":
         yaml_i["spec"]["model_kwargs"] = {"model_list": "superfast"}
 
-    run_yaml(tmpdirname=tmpdirname, yaml_i=yaml_i, output_data_path=output_data_path, test_metrics_check=False)
-    assert pd.read_csv(additional_data_path)['Date'].equals(pd.read_csv(f"{tmpdirname}/results/forecast.csv")['Date'])
+    run_yaml(
+        tmpdirname=tmpdirname,
+        yaml_i=yaml_i,
+        output_data_path=output_data_path,
+        test_metrics_check=False,
+    )
+    assert pd.read_csv(additional_data_path)["Date"].equals(
+        pd.read_csv(f"{tmpdirname}/results/forecast.csv")["Date"]
+    )
 
 
 if __name__ == "__main__":