-
Notifications
You must be signed in to change notification settings - Fork 17
[ML-49316] Support MonthMid and MonthEnd for DeepAR #160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -143,3 +143,111 @@ def test_single_series_week_day_index(self): | |
|
||
# Assert equality | ||
pd.testing.assert_frame_equal(transformed_df, expected_df) | ||
|
||
def test_single_series_month_start_index(self): | ||
target_col = "sales" | ||
time_col = "date" | ||
num_months = 24 | ||
|
||
# Starting from first day of January 2020 | ||
base_dates = pd.date_range( | ||
start='2020-01-01', | ||
periods=num_months, | ||
freq='MS' | ||
) | ||
|
||
base_df = pd.DataFrame({ | ||
time_col: base_dates, | ||
target_col: range(num_months) | ||
}) | ||
|
||
# Create a dataframe with missing months (drop months 3 and 4) | ||
dropped_df = base_df.drop([3, 4]).reset_index(drop=True) | ||
|
||
# Transform the dataframe | ||
transformed_df = set_index_and_fill_missing_time_steps( | ||
dropped_df, | ||
time_col, | ||
"MS" # Monthly frequency | ||
) | ||
|
||
# Create expected dataframe | ||
expected_df = base_df.copy() | ||
expected_df.loc[[3, 4], target_col] = float('nan') | ||
expected_df = expected_df.set_index(time_col).rename_axis(None) | ||
expected_df = expected_df.to_period("M") | ||
|
||
# Assert equality | ||
pd.testing.assert_frame_equal(transformed_df, expected_df) | ||
|
||
def test_single_series_month_mid_index(self): | ||
target_col = "sales" | ||
time_col = "date" | ||
num_months = 24 | ||
|
||
# Starting from fifteenth day of January 2020 | ||
base_dates = pd.date_range( | ||
start='2020-01-01', | ||
periods=num_months, | ||
freq='MS' | ||
) + pd.DateOffset(days=14) | ||
|
||
base_df = pd.DataFrame({ | ||
time_col: base_dates, | ||
target_col: range(num_months) | ||
}) | ||
|
||
# Create a dataframe with missing months (drop months 3 and 4) | ||
dropped_df = base_df.drop([3, 4]).reset_index(drop=True) | ||
|
||
# Transform the dataframe | ||
transformed_df = set_index_and_fill_missing_time_steps( | ||
dropped_df, | ||
time_col, | ||
"MS" | ||
) | ||
|
||
# Create expected dataframe | ||
expected_df = base_df.copy() | ||
expected_df.loc[[3, 4], target_col] = float('nan') | ||
expected_df = expected_df.set_index(time_col).rename_axis(None) | ||
expected_df = expected_df.to_period("M") | ||
|
||
# Assert equality | ||
pd.testing.assert_frame_equal(transformed_df, expected_df) | ||
|
||
def test_single_series_month_end_index(self): | ||
target_col = "sales" | ||
time_col = "date" | ||
num_months = 24 | ||
|
||
# Starting from end day of January 2020 | ||
base_dates = pd.date_range( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is it starting on the last ay of Jan 2020? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see line 228, by specifying freq='M', it is by default the end of the month There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. Can you add a comment on the line below? |
||
start='2020-01-01', | ||
periods=num_months, | ||
freq='M' | ||
) | ||
|
||
base_df = pd.DataFrame({ | ||
time_col: base_dates, | ||
target_col: range(num_months) | ||
}) | ||
|
||
# Create a dataframe with missing months (drop months 3 and 4) | ||
dropped_df = base_df.drop([3, 4]).reset_index(drop=True) | ||
|
||
# Transform the dataframe | ||
transformed_df = set_index_and_fill_missing_time_steps( | ||
dropped_df, | ||
time_col, | ||
"MS" # Monthly frequency | ||
) | ||
|
||
# Create expected dataframe | ||
expected_df = base_df.copy() | ||
expected_df.loc[[3, 4], target_col] = float('nan') | ||
expected_df = expected_df.set_index(time_col).rename_axis(None) | ||
expected_df = expected_df.to_period("M") | ||
|
||
# Assert equality | ||
pd.testing.assert_frame_equal(transformed_df, expected_df) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the detailed function description!