Skip to content

Add databricks_automl to the conda env for arima and deepar #151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions runtime/databricks/automl_runtime/forecast/deepar/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@
from mlflow.utils.environment import _mlflow_conda_env

from databricks.automl_runtime.forecast.model import ForecastModel, mlflow_forecast_log_model
from databricks.automl_runtime import version


DEEPAR_ADDITIONAL_PIP_DEPS = [
f"gluonts[torch]=={gluonts.__version__}",
f"pandas=={pd.__version__}",
f"databricks-automl-runtime=={version.__version__}"
]

DEEPAR_CONDA_ENV = _mlflow_conda_env(
additional_pip_deps=[
f"gluonts[torch]=={gluonts.__version__}",
f"pandas=={pd.__version__}",
]
additional_pip_deps=DEEPAR_ADDITIONAL_PIP_DEPS
)


Expand Down
10 changes: 7 additions & 3 deletions runtime/databricks/automl_runtime/forecast/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,14 @@ def mlflow_forecast_log_model(forecast_model: ForecastModel,
:param forecast_model: Forecast model wrapper
:param sample_input: sample input Dataframes for model inference
"""
# log the model without signature if infer_signature is failed.
# TODO: we should not be logging without a signature since it cannot be registered to UC then

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a JIRA ticket number to TODO:

e.g. # TODO(ML-XXXXX): we should not be logging...

try:
signature = forecast_model.infer_signature(sample_input)
except Exception: # noqa
signature = None
mlflow.pyfunc.log_model("model", conda_env=forecast_model.model_env,
python_model=forecast_model, signature=signature)
mlflow.pyfunc.log_model(
artifact_path="model",
conda_env=forecast_model.model_env,
python_model=forecast_model,
signature=signature
)
12 changes: 8 additions & 4 deletions runtime/databricks/automl_runtime/forecast/pmdarima/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,17 @@
from databricks.automl_runtime.forecast.model import ForecastModel, mlflow_forecast_log_model
from databricks.automl_runtime.forecast.utils import calculate_period_differences, is_frequency_consistency, \
make_future_dataframe, make_single_future_dataframe
from databricks.automl_runtime import version


ARIMA_ADDITIONAL_PIP_DEPS = [
f"pmdarima=={pmdarima.__version__}",
f"pandas=={pd.__version__}",
f"databricks-automl-runtime=={version.__version__}"
]

ARIMA_CONDA_ENV = _mlflow_conda_env(
additional_pip_deps=[
f"pmdarima=={pmdarima.__version__}",
f"pandas=={pd.__version__}",
]
additional_pip_deps=ARIMA_ADDITIONAL_PIP_DEPS
)


Expand Down
6 changes: 4 additions & 2 deletions runtime/databricks/automl_runtime/forecast/prophet/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@
from databricks.automl_runtime.forecast.utils import is_quaterly_alias, make_future_dataframe


PROPHET_CONDA_ENV = _mlflow_conda_env(
additional_pip_deps=[
PROPHET_ADDITIONAL_PIP_DEPS = [
f"prophet=={prophet.__version__}",
f"cloudpickle=={cloudpickle.__version__}",
f"databricks-automl-runtime=={version.__version__}",
]

PROPHET_CONDA_ENV = _mlflow_conda_env(
additional_pip_deps=PROPHET_ADDITIONAL_PIP_DEPS
)


Expand Down
23 changes: 20 additions & 3 deletions runtime/tests/automl_runtime/forecast/deepar/model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from gluonts.torch.model.predictor import PyTorchPredictor

from databricks.automl_runtime.forecast.deepar.model import (
DeepARModel, mlflow_deepar_log_model,
DeepARModel, mlflow_deepar_log_model, DEEPAR_ADDITIONAL_PIP_DEPS
)


Expand Down Expand Up @@ -104,8 +104,12 @@ def test_model_save_and_load_single_series(self):
mlflow_deepar_log_model(deepar_model, sample_input)

run_id = run.info.run_id
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")

# check if all additional dependencies are logged
self._check_requirements(run_id)

# load the model and predict
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
pred_df = loaded_model.predict(sample_input)

assert pred_df.columns.tolist() == [time_col, "yhat"]
Expand Down Expand Up @@ -145,10 +149,23 @@ def test_model_save_and_load_multi_series(self):
mlflow_deepar_log_model(deepar_model, sample_input)

run_id = run.info.run_id
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")

# check if all additional dependencies are logged
self._check_requirements(run_id)

# load the model and predict
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
pred_df = loaded_model.predict(sample_input)

assert pred_df.columns.tolist() == [time_col, "yhat", id_col]
assert len(pred_df) == self.prediction_length * 2
assert pred_df[time_col].min() > sample_input[time_col].max()

def _check_requirements(self, run_id: str):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: if a method is being tested in a TestClass, make it a public method (i.e. without prefix _)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not a unit test itself, it's a helper method called only within other unit test cases, should I still remove prefix?

# read requirements.txt from the run
requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt")
with open(requirements_path, "r") as f:
requirements = f.read()
# check if all additional dependencies are logged
for dependency in DEEPAR_ADDITIONAL_PIP_DEPS:
self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}")
25 changes: 23 additions & 2 deletions runtime/tests/automl_runtime/forecast/pmdarima/model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE
from pmdarima.arima import ARIMA

from databricks.automl_runtime.forecast.pmdarima.model import ArimaModel, MultiSeriesArimaModel, AbstractArimaModel, \
mlflow_arima_log_model
from databricks.automl_runtime.forecast.pmdarima.model import (

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please break imports into multiple lines:

See https://peps.python.org/pep-0008/#imports

ArimaModel, MultiSeriesArimaModel, AbstractArimaModel, \
mlflow_arima_log_model, ARIMA_ADDITIONAL_PIP_DEPS
)


class TestArimaModel(unittest.TestCase):
Expand Down Expand Up @@ -438,6 +440,11 @@ def test_mlflow_arima_log_model(self):

# Load the saved model from mlflow
run_id = run.info.run_id

# Check additonal requirements logged correctly
self._check_requirements(run_id)

# Load the model
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")

# Make sure can make forecasts with the saved model
Expand All @@ -460,6 +467,11 @@ def test_mlflow_arima_log_model_multiseries(self):

# Load the saved model from mlflow
run_id = run.info.run_id

# Check additonal requirements logged correctly
self._check_requirements(run_id)

# Load the model
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")

# Make sure can make forecasts with the saved model
Expand All @@ -473,3 +485,12 @@ def test_mlflow_arima_log_model_multiseries(self):

# Make sure can make forecasts for one-row dataframe
loaded_model.predict(test_df[0:1])

def _check_requirements(self, run_id: str):
# read requirements.txt from the run
requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt")
with open(requirements_path, "r") as f:
requirements = f.read()
# check if all additional dependencies are logged
for dependency in ARIMA_ADDITIONAL_PIP_DEPS:
self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}")
36 changes: 30 additions & 6 deletions runtime/tests/automl_runtime/forecast/prophet/model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,22 @@
ProphetModel,
OFFSET_ALIAS_MAP,
DATE_OFFSET_KEYWORD_MAP,
PROPHET_ADDITIONAL_PIP_DEPS
)

PROPHET_MODEL_JSON = '{"growth": "linear", "n_changepoints": 6, "specified_changepoints": false, "changepoint_range": 0.8, "yearly_seasonality": "auto", "weekly_seasonality": "auto", "daily_seasonality": "auto", "seasonality_mode": "additive", "seasonality_prior_scale": 10.0, "changepoint_prior_scale": 0.05, "holidays_prior_scale": 10.0, "mcmc_samples": 0, "interval_width": 0.8, "uncertainty_samples": 1000, "y_scale": 8.0, "logistic_floor": false, "country_holidays": null, "component_modes": {"additive": ["weekly", "additive_terms", "extra_regressors_additive", "holidays"], "multiplicative": ["multiplicative_terms", "extra_regressors_multiplicative"]}, "changepoints": "{\\"name\\":\\"ds\\",\\"index\\":[1,2,3,4,5,6],\\"data\\":[\\"2020-10-04T00:00:00.000\\",\\"2020-10-07T00:00:00.000\\",\\"2020-10-10T00:00:00.000\\",\\"2020-10-13T00:00:00.000\\",\\"2020-10-16T00:00:00.000\\",\\"2020-10-19T00:00:00.000\\"]}", "history_dates": "{\\"name\\":\\"ds\\",\\"index\\":[0,1,2,3,4,5,6,7,8],\\"data\\":[\\"2020-10-01T00:00:00.000\\",\\"2020-10-04T00:00:00.000\\",\\"2020-10-07T00:00:00.000\\",\\"2020-10-10T00:00:00.000\\",\\"2020-10-13T00:00:00.000\\",\\"2020-10-16T00:00:00.000\\",\\"2020-10-19T00:00:00.000\\",\\"2020-10-22T00:00:00.000\\",\\"2020-10-25T00:00:00.000\\"]}", "train_holiday_names": null, "start": 1601510400.0, "t_scale": 2073600.0, "holidays": null, "history": "{\\"schema\\":{\\"fields\\":[{\\"name\\":\\"ds\\",\\"type\\":\\"datetime\\"},{\\"name\\":\\"y\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"floor\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"t\\",\\"type\\":\\"number\\"},{\\"name\\":\\"y_scaled\\",\\"type\\":\\"number\\"}],\\"pandas_version\\":\\"1.4.0\\"},\\"data\\":[{\\"ds\\":\\"2020-10-01T00:00:00.000\\",\\"y\\":0,\\"floor\\":0,\\"t\\":0.0,\\"y_scaled\\":0.0},{\\"ds\\":\\"2020-10-04T00:00:00.000\\",\\"y\\":1,\\"floor\\":0,\\"t\\":0.125,\\"y_scaled\\":0.125},{\\"ds\\":\\"2020-10-07T00:00:00.000\\",\\"y\\":2,\\"floor\\":0,\\"t\\":0.25,\\"y_scaled\\":0.25},{\\"ds\\":\\"2020-10-10T00:00:00.000\\",\\"y\\":3,\\"floor\\":0,\\"t\\":0.375,\\"y_scaled\\":0.375},{\\"ds\\":\\"2020-10-13T00:00:00.000\\",\\"y\\":4,\\"floor\\":0,\\"t\\":0.5,\\"y_scaled\\":0.5},{\\"ds\\":\\"2020-10-16T00:00:00.000\\",\\"y\\":5,\\"floor\\":0,\\"t\\":0.625,\\"y_scaled\\":0.625},{\\"ds\\":\\"2020-10-19T00:00:00.000\\",\\"y\\":6,\\"floor\\":0,\\"t\\":0.75,\\"y_scaled\\":0.75},{\\"ds\\":\\"2020-10-22T00:00:00.000\\",\\"y\\":7,\\"floor\\":0,\\"t\\":0.875,\\"y_scaled\\":0.875},{\\"ds\\":\\"2020-10-25T00:00:00.000\\",\\"y\\":8,\\"floor\\":0,\\"t\\":1.0,\\"y_scaled\\":1.0}]}", "train_component_cols": "{\\"schema\\":{\\"fields\\":[{\\"name\\":\\"additive_terms\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"weekly\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"multiplicative_terms\\",\\"type\\":\\"integer\\"}],\\"pandas_version\\":\\"1.4.0\\"},\\"data\\":[{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0}]}", "changepoints_t": [0.125, 0.25, 0.375, 0.5, 0.625, 0.75], "seasonalities": [["weekly"], {"weekly": {"period": 7, "fourier_order": 3, "prior_scale": 10.0, "mode": "additive", "condition_name": null}}], "extra_regressors": [[], {}], "fit_kwargs": {}, "params": {"lp__": [[202.053]], "k": [[1.19777]], "m": [[0.0565623]], "delta": [[-0.86152, 0.409957, -0.103241, 0.528979, 0.535181, -0.509356]], "sigma_obs": [[2.53056e-13]], "beta": [[-0.00630566, 0.016248, 0.0318587, -0.068705, 0.0029986, -0.00410522]], "trend": [[0.0565623, 0.206283, 0.248314, 0.341589, 0.421959, 0.568452, 0.781842, 0.931562, 1.08128]]}, "__prophet_version": "1.1.1"}'


class TestProphetModel(unittest.TestCase):
class BaseProphetModelTest(unittest.TestCase):
def _check_requirements(self, run_id: str):
# read requirements.txt from the run
requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt")
with open(requirements_path, "r") as f:
requirements = f.read()
# check if all additional dependencies are logged
for dependency in PROPHET_ADDITIONAL_PIP_DEPS:
self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}")

class TestProphetModel(BaseProphetModelTest):
@classmethod
def setUpClass(cls) -> None:
num_rows = 9
Expand Down Expand Up @@ -74,8 +84,13 @@ def test_model_save_and_load(self):

with mlflow.start_run() as run:
mlflow_prophet_log_model(prophet_model)
# Load the saved model from mlflow

run_id = run.info.run_id

# Check additonal requirements logged correctly
self._check_requirements(run_id)

# Load the saved model from mlflow
prophet_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")

# Check the prediction with the saved model
Expand Down Expand Up @@ -144,7 +159,7 @@ def test_validate_predict_cols(self):
assert e.value.error_code == ErrorCode.Name(INTERNAL_ERROR)


class TestMultiSeriesProphetModel(unittest.TestCase):
class TestMultiSeriesProphetModel(BaseProphetModelTest):
@classmethod
def setUpClass(cls) -> None:
cls.model_json = PROPHET_MODEL_JSON
Expand Down Expand Up @@ -178,8 +193,13 @@ def test_model_save_and_load(self):
with mlflow.start_run() as run:
mlflow_prophet_log_model(self.prophet_model, sample_input=test_df)

# Load the saved model from mlflow

run_id = run.info.run_id

# Check additonal requirements logged correctly
self._check_requirements(run_id)

# Load the saved model from mlflow
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")

# Check the prediction with the saved model
Expand Down Expand Up @@ -239,9 +259,13 @@ def test_model_save_and_load_multi_ids(self):
)
with mlflow.start_run() as run:
mlflow_prophet_log_model(prophet_model, sample_input=test_df)

run_id = run.info.run_id

# Check additonal requirements logged correctly
self._check_requirements(run_id)

# Load the saved model from mlflow
run_id = run.info.run_id
loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")

# Check the prediction with the saved model
Expand Down
Loading