|
22 | 22 | from databricks.automl_runtime.forecast import DATE_OFFSET_KEYWORD_MAP
|
23 | 23 | from databricks.automl_runtime.forecast.utils import \
|
24 | 24 | generate_cutoffs, get_validation_horizon, calculate_period_differences, \
|
25 |
| - is_frequency_consistency, make_future_dataframe, make_single_future_dataframe |
| 25 | + is_frequency_consistency, make_future_dataframe, make_single_future_dataframe, \ |
| 26 | + generate_custom_cutoffs |
26 | 27 |
|
27 | 28 |
|
28 | 29 | class TestGetValidationHorizon(unittest.TestCase):
|
@@ -177,6 +178,71 @@ def test_generate_cutoffs_success_annualy(self):
|
177 | 178 | self.assertEqual([pd.Timestamp('2018-07-14 00:00:00'), pd.Timestamp('2019-07-14 00:00:00'), pd.Timestamp('2020-07-14 00:00:00')], cutoffs)
|
178 | 179 |
|
179 | 180 |
|
| 181 | +class TestTestGenerateCustomCutoffs(unittest.TestCase): |
| 182 | + def test_generate_custom_cutoffs_success_hourly(self): |
| 183 | + df = pd.DataFrame( |
| 184 | + pd.date_range(start="2020-07-01", periods=168, freq='h'), columns=["ds"] |
| 185 | + ).rename_axis("y").reset_index() |
| 186 | + expected_cutoffs = [pd.Timestamp('2020-07-07 13:00:00'), |
| 187 | + pd.Timestamp('2020-07-07 14:00:00'), |
| 188 | + pd.Timestamp('2020-07-07 15:00:00'), |
| 189 | + pd.Timestamp('2020-07-07 16:00:00')] |
| 190 | + cutoffs = generate_custom_cutoffs(df, horizon=7, unit="H", split_cutoff=pd.Timestamp('2020-07-07 13:00:00')) |
| 191 | + self.assertEqual(expected_cutoffs, cutoffs) |
| 192 | + |
| 193 | + def test_generate_custom_cutoffs_success_daily(self): |
| 194 | + df = pd.DataFrame( |
| 195 | + pd.date_range(start="2020-07-01", end="2020-08-30", freq='d'), columns=["ds"] |
| 196 | + ).rename_axis("y").reset_index() |
| 197 | + cutoffs = generate_custom_cutoffs(df, horizon=7, unit="D", split_cutoff=pd.Timestamp('2020-08-21 00:00:00')) |
| 198 | + self.assertEqual([pd.Timestamp('2020-08-21 12:00:00'), pd.Timestamp('2020-08-22 00:00:00'), pd.Timestamp('2020-08-23 00:00:00')], cutoffs) |
| 199 | + |
| 200 | + def test_generate_custom_cutoffs_success_weekly(self): |
| 201 | + df = pd.DataFrame( |
| 202 | + pd.date_range(start="2020-07-01", periods=52, freq='W'), columns=["ds"] |
| 203 | + ).rename_axis("y").reset_index() |
| 204 | + cutoffs = generate_custom_cutoffs(df, horizon=7, unit="W", split_cutoff=pd.Timestamp('2021-04-25 00:00:00')) |
| 205 | + self.assertEqual([pd.Timestamp('2021-04-25 00:00:00'), pd.Timestamp('2021-05-02 00:00:00'), pd.Timestamp('2021-05-09 00:00:00')], cutoffs) |
| 206 | + |
| 207 | + def test_generate_custom_cutoffs_success_monthly(self): |
| 208 | + df = pd.DataFrame( |
| 209 | + pd.date_range(start="2020-01-12", periods=24, freq=pd.DateOffset(months=1)), columns=["ds"] |
| 210 | + ).rename_axis("y").reset_index() |
| 211 | + cutoffs = generate_custom_cutoffs(df, horizon=7, unit="MS", split_cutoff=pd.Timestamp('2021-03-12 00:00:00')) |
| 212 | + self.assertEqual([pd.Timestamp('2021-03-12 00:00:00'), pd.Timestamp('2021-04-12 00:00:00'), pd.Timestamp('2021-05-12 00:00:00')], cutoffs) |
| 213 | + |
| 214 | + def test_generate_custom_cutoffs_success_quaterly(self): |
| 215 | + df = pd.DataFrame( |
| 216 | + pd.date_range(start="2020-07-12", periods=9, freq=pd.DateOffset(months=3)), columns=["ds"] |
| 217 | + ).rename_axis("y").reset_index() |
| 218 | + cutoffs = generate_custom_cutoffs(df, horizon=7, unit="QS", split_cutoff=pd.Timestamp('2021-07-12 00:00:00')) |
| 219 | + self.assertEqual([pd.Timestamp('2021-07-12 00:00:00'), pd.Timestamp('2022-10-12 00:00:00')], cutoffs) |
| 220 | + |
| 221 | + def test_generate_custom_cutoffs_success_annualy(self): |
| 222 | + df = pd.DataFrame( |
| 223 | + pd.date_range(start="2012-07-14", periods=10, freq=pd.DateOffset(years=1)), columns=["ds"] |
| 224 | + ).rename_axis("y").reset_index() |
| 225 | + cutoffs = generate_custom_cutoffs(df, horizon=7, unit="YS", split_cutoff=pd.Timestamp('2012-07-14 00:00:00')) |
| 226 | + self.assertEqual([pd.Timestamp('2012-07-14 00:00:00'), pd.Timestamp('2013-07-14 00:00:00'), pd.Timestamp('2014-07-14 00:00:00')], cutoffs) |
| 227 | + |
| 228 | + def test_generate_custom_cutoffs_success_with_small_gaps(self): |
| 229 | + df = pd.DataFrame( |
| 230 | + pd.date_range(start="2020-07-01", periods=30, freq='3d'), columns=["ds"] |
| 231 | + ).rename_axis("y").reset_index() |
| 232 | + cutoffs = generate_custom_cutoffs(df, horizon=7, unit="D", split_cutoff=pd.Timestamp('2020-09-17 00:00:00')) |
| 233 | + self.assertEqual([pd.Timestamp('2020-09-17 00:00:00'), |
| 234 | + pd.Timestamp('2020-09-18 00:00:00'), |
| 235 | + pd.Timestamp('2020-09-19 00:00:00')], cutoffs) |
| 236 | + |
| 237 | + def test_generate_custom_cutoffs_success_with_large_gaps(self): |
| 238 | + df = pd.DataFrame( |
| 239 | + pd.date_range(start="2020-07-01", periods=30, freq='9d'), columns=["ds"] |
| 240 | + ).rename_axis("y").reset_index() |
| 241 | + cutoffs = generate_custom_cutoffs(df, horizon=7, unit="D", split_cutoff=pd.Timestamp('2021-03-10 00:00:00')) |
| 242 | + self.assertEqual([pd.Timestamp('2021-03-10 00:00:00'), |
| 243 | + pd.Timestamp('2021-03-12 00:00:00')], cutoffs) |
| 244 | + |
| 245 | + |
180 | 246 | class TestCalculatePeriodsAndFrequency(unittest.TestCase):
|
181 | 247 | def setUp(self) -> None:
|
182 | 248 | return super().setUp()
|
|
0 commit comments