-
Notifications
You must be signed in to change notification settings - Fork 17
Add databricks_automl to the conda env for arima and deepar #151
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
6bb8b46
4274b69
3a2cf30
9b4dac0
79e2e60
f4f3cad
f3aebaa
0f82e84
0060de6
ac27c97
0930423
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,7 @@ | |
from gluonts.torch.model.predictor import PyTorchPredictor | ||
|
||
from databricks.automl_runtime.forecast.deepar.model import ( | ||
DeepARModel, mlflow_deepar_log_model, | ||
DeepARModel, mlflow_deepar_log_model, DEEPAR_ADDITIONAL_PIP_DEPS | ||
) | ||
|
||
|
||
|
@@ -104,8 +104,12 @@ def test_model_save_and_load_single_series(self): | |
mlflow_deepar_log_model(deepar_model, sample_input) | ||
|
||
run_id = run.info.run_id | ||
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") | ||
|
||
# check if all additional dependencies are logged | ||
self._check_requirements(run_id) | ||
|
||
# load the model and predict | ||
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") | ||
pred_df = loaded_model.predict(sample_input) | ||
|
||
assert pred_df.columns.tolist() == [time_col, "yhat"] | ||
|
@@ -145,10 +149,23 @@ def test_model_save_and_load_multi_series(self): | |
mlflow_deepar_log_model(deepar_model, sample_input) | ||
|
||
run_id = run.info.run_id | ||
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") | ||
|
||
# check if all additional dependencies are logged | ||
self._check_requirements(run_id) | ||
|
||
# load the model and predict | ||
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") | ||
pred_df = loaded_model.predict(sample_input) | ||
|
||
assert pred_df.columns.tolist() == [time_col, "yhat", id_col] | ||
assert len(pred_df) == self.prediction_length * 2 | ||
assert pred_df[time_col].min() > sample_input[time_col].max() | ||
|
||
def _check_requirements(self, run_id: str): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: if a method is being tested in a TestClass, make it a public method (i.e. without prefix _) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not a unit test itself, it's a helper method called only within other unit test cases, should I still remove prefix? |
||
# read requirements.txt from the run | ||
requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt") | ||
with open(requirements_path, "r") as f: | ||
requirements = f.read() | ||
# check if all additional dependencies are logged | ||
for dependency in DEEPAR_ADDITIONAL_PIP_DEPS: | ||
self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,8 +25,10 @@ | |
from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE | ||
from pmdarima.arima import ARIMA | ||
|
||
from databricks.automl_runtime.forecast.pmdarima.model import ArimaModel, MultiSeriesArimaModel, AbstractArimaModel, \ | ||
mlflow_arima_log_model | ||
from databricks.automl_runtime.forecast.pmdarima.model import ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please break imports into multiple lines: |
||
ArimaModel, MultiSeriesArimaModel, AbstractArimaModel, \ | ||
mlflow_arima_log_model, ARIMA_ADDITIONAL_PIP_DEPS | ||
) | ||
|
||
|
||
class TestArimaModel(unittest.TestCase): | ||
|
@@ -438,6 +440,11 @@ def test_mlflow_arima_log_model(self): | |
|
||
# Load the saved model from mlflow | ||
run_id = run.info.run_id | ||
|
||
# Check additonal requirements logged correctly | ||
self._check_requirements(run_id) | ||
|
||
# Load the model | ||
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") | ||
|
||
# Make sure can make forecasts with the saved model | ||
|
@@ -460,6 +467,11 @@ def test_mlflow_arima_log_model_multiseries(self): | |
|
||
# Load the saved model from mlflow | ||
run_id = run.info.run_id | ||
|
||
# Check additonal requirements logged correctly | ||
self._check_requirements(run_id) | ||
|
||
# Load the model | ||
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") | ||
|
||
# Make sure can make forecasts with the saved model | ||
|
@@ -473,3 +485,12 @@ def test_mlflow_arima_log_model_multiseries(self): | |
|
||
# Make sure can make forecasts for one-row dataframe | ||
loaded_model.predict(test_df[0:1]) | ||
|
||
def _check_requirements(self, run_id: str): | ||
# read requirements.txt from the run | ||
requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt") | ||
with open(requirements_path, "r") as f: | ||
requirements = f.read() | ||
# check if all additional dependencies are logged | ||
for dependency in ARIMA_ADDITIONAL_PIP_DEPS: | ||
self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a JIRA ticket number to TODO:
e.g. # TODO(ML-XXXXX): we should not be logging...