add tests

Lanz-db · Lanz-db · commit f1a8eb29b820 · 2024-07-18T00:11:16.000-07:00
diff --git a/runtime/tests/automl_runtime/forecast/pmdarima/training_test.py b/runtime/tests/automl_runtime/forecast/pmdarima/training_test.py
@@ -72,6 +72,17 @@ def test_fit_success_with_exogenous(self):
         results_pd = arima_estimator.fit(self.df_with_exogenous)
         self.assertIn("smape", results_pd)
         self.assertIn("pickled_model", results_pd)
+    
+    def test_fit_success_with_split_cutoff(self):
+        arima_estimator = ArimaEstimator(horizon=1,
+                                         frequency_unit="d",
+                                         metric="smape",
+                                         seasonal_periods=[1, 7],
+                                         num_folds=2,
+                                         split_cutoff=pd.Timestamp('2020-07-17 00:00:00'))
+        results_pd = arima_estimator.fit(self.df)
+        self.assertIn("smape", results_pd)
+        self.assertIn("pickled_model", results_pd)
 
     def test_fit_skip_too_long_seasonality(self):
         arima_estimator = ArimaEstimator(horizon=1,
diff --git a/runtime/tests/automl_runtime/forecast/prophet/forecast_test.py b/runtime/tests/automl_runtime/forecast/prophet/forecast_test.py
@@ -140,6 +140,34 @@ def test_training_with_extra_regressors(self):
         model_json = json.loads(results["model_json"][0])
         self.assertListEqual(model_json["extra_regressors"][0], ["f1", "f2"])
 
+    def test_training_with_split_cutoff(self):
+        hyperopt_estim = ProphetHyperoptEstimator(horizon=1,
+                                                  frequency_unit="d",
+                                                  metric="smape",
+                                                  interval_width=0.8,
+                                                  country_holidays="US",
+                                                  search_space=self.search_space,
+                                                  num_folds=2,
+                                                  trial_timeout=1000,
+                                                  random_state=0,
+                                                  is_parallel=False,
+                                                  regressors=["f1", "f2"],
+                                                  split_cutoff=pd.Timestamp('2020-07-10 00:00:00'))
+
+        for df in [self.df, self.df_datetime_date, self.df_string_time]:
+            results = hyperopt_estim.fit(df)
+            self.assertAlmostEqual(results["mse"][0], 0)
+            self.assertAlmostEqual(results["rmse"][0], 0, delta=1e-6)
+            self.assertAlmostEqual(results["mae"][0], 0, delta=1e-6)
+            self.assertAlmostEqual(results["mape"][0], 0)
+            self.assertAlmostEqual(results["mdape"][0], 0)
+            self.assertAlmostEqual(results["smape"][0], 0)
+            self.assertAlmostEqual(results["coverage"][0], 1)
+            # check the best result parameter is inside the search space
+            model_json = json.loads(results["model_json"][0])
+            self.assertGreaterEqual(model_json["changepoint_prior_scale"], 0.1)
+            self.assertLessEqual(model_json["changepoint_prior_scale"], 0.5)
+
     @patch("databricks.automl_runtime.forecast.prophet.forecast.fmin")
     @patch("databricks.automl_runtime.forecast.prophet.forecast.Trials")
     @patch("databricks.automl_runtime.forecast.prophet.forecast.partial")
diff --git a/runtime/tests/automl_runtime/forecast/utils_test.py b/runtime/tests/automl_runtime/forecast/utils_test.py
@@ -22,7 +22,8 @@
 from databricks.automl_runtime.forecast import DATE_OFFSET_KEYWORD_MAP
 from databricks.automl_runtime.forecast.utils import \
     generate_cutoffs, get_validation_horizon, calculate_period_differences, \
-    is_frequency_consistency, make_future_dataframe, make_single_future_dataframe
+    is_frequency_consistency, make_future_dataframe, make_single_future_dataframe, \
+    generate_custom_cutoffs
 
 
 class TestGetValidationHorizon(unittest.TestCase):
@@ -177,6 +178,71 @@ def test_generate_cutoffs_success_annualy(self):
         self.assertEqual([pd.Timestamp('2018-07-14 00:00:00'), pd.Timestamp('2019-07-14 00:00:00'), pd.Timestamp('2020-07-14 00:00:00')], cutoffs)
 
 
+class TestTestGenerateCustomCutoffs(unittest.TestCase):
+    def test_generate_custom_cutoffs_success_hourly(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2020-07-01", periods=168, freq='h'), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        expected_cutoffs = [pd.Timestamp('2020-07-07 13:00:00'),
+                            pd.Timestamp('2020-07-07 14:00:00'),
+                            pd.Timestamp('2020-07-07 15:00:00'),
+                            pd.Timestamp('2020-07-07 16:00:00')]
+        cutoffs = generate_custom_cutoffs(df, horizon=7, unit="H", split_cutoff=pd.Timestamp('2020-07-07 13:00:00'))
+        self.assertEqual(expected_cutoffs, cutoffs)
+
+    def test_generate_custom_cutoffs_success_daily(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2020-07-01", end="2020-08-30", freq='d'), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        cutoffs = generate_custom_cutoffs(df, horizon=7, unit="D", split_cutoff=pd.Timestamp('2020-08-21 00:00:00'))
+        self.assertEqual([pd.Timestamp('2020-08-21 12:00:00'), pd.Timestamp('2020-08-22 00:00:00'), pd.Timestamp('2020-08-23 00:00:00')], cutoffs)
+
+    def test_generate_custom_cutoffs_success_weekly(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2020-07-01", periods=52, freq='W'), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        cutoffs = generate_custom_cutoffs(df, horizon=7, unit="W", split_cutoff=pd.Timestamp('2021-04-25 00:00:00'))
+        self.assertEqual([pd.Timestamp('2021-04-25 00:00:00'), pd.Timestamp('2021-05-02 00:00:00'), pd.Timestamp('2021-05-09 00:00:00')], cutoffs)
+
+    def test_generate_custom_cutoffs_success_monthly(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2020-01-12", periods=24, freq=pd.DateOffset(months=1)), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        cutoffs = generate_custom_cutoffs(df, horizon=7, unit="MS", split_cutoff=pd.Timestamp('2021-03-12 00:00:00'))
+        self.assertEqual([pd.Timestamp('2021-03-12 00:00:00'), pd.Timestamp('2021-04-12 00:00:00'), pd.Timestamp('2021-05-12 00:00:00')], cutoffs)
+
+    def test_generate_custom_cutoffs_success_quaterly(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2020-07-12", periods=9, freq=pd.DateOffset(months=3)), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        cutoffs = generate_custom_cutoffs(df, horizon=7, unit="QS", split_cutoff=pd.Timestamp('2021-07-12 00:00:00'))
+        self.assertEqual([pd.Timestamp('2021-07-12 00:00:00'), pd.Timestamp('2022-10-12 00:00:00')], cutoffs)
+
+    def test_generate_custom_cutoffs_success_annualy(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2012-07-14", periods=10, freq=pd.DateOffset(years=1)), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        cutoffs = generate_custom_cutoffs(df, horizon=7, unit="YS", split_cutoff=pd.Timestamp('2012-07-14 00:00:00'))
+        self.assertEqual([pd.Timestamp('2012-07-14 00:00:00'), pd.Timestamp('2013-07-14 00:00:00'), pd.Timestamp('2014-07-14 00:00:00')], cutoffs)
+
+    def test_generate_custom_cutoffs_success_with_small_gaps(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2020-07-01", periods=30, freq='3d'), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        cutoffs = generate_custom_cutoffs(df, horizon=7, unit="D", split_cutoff=pd.Timestamp('2020-09-17 00:00:00'))
+        self.assertEqual([pd.Timestamp('2020-09-17 00:00:00'),
+                          pd.Timestamp('2020-09-18 00:00:00'),
+                          pd.Timestamp('2020-09-19 00:00:00')], cutoffs)
+    
+    def test_generate_custom_cutoffs_success_with_large_gaps(self):
+        df = pd.DataFrame(
+            pd.date_range(start="2020-07-01", periods=30, freq='9d'), columns=["ds"]
+        ).rename_axis("y").reset_index()
+        cutoffs = generate_custom_cutoffs(df, horizon=7, unit="D", split_cutoff=pd.Timestamp('2021-03-10 00:00:00'))
+        self.assertEqual([pd.Timestamp('2021-03-10 00:00:00'),
+                          pd.Timestamp('2021-03-12 00:00:00')], cutoffs)
+
+
 class TestCalculatePeriodsAndFrequency(unittest.TestCase):
     def setUp(self) -> None:
         return super().setUp()