diff --git a/runtime/databricks/automl_runtime/forecast/deepar/model.py b/runtime/databricks/automl_runtime/forecast/deepar/model.py index feaaa8c9..46ae1e26 100644 --- a/runtime/databricks/automl_runtime/forecast/deepar/model.py +++ b/runtime/databricks/automl_runtime/forecast/deepar/model.py @@ -23,12 +23,17 @@ from mlflow.utils.environment import _mlflow_conda_env from databricks.automl_runtime.forecast.model import ForecastModel, mlflow_forecast_log_model +from databricks.automl_runtime import version + + +DEEPAR_ADDITIONAL_PIP_DEPS = [ + f"gluonts[torch]=={gluonts.__version__}", + f"pandas=={pd.__version__}", + f"databricks-automl-runtime=={version.__version__}" +] DEEPAR_CONDA_ENV = _mlflow_conda_env( - additional_pip_deps=[ - f"gluonts[torch]=={gluonts.__version__}", - f"pandas=={pd.__version__}", - ] + additional_pip_deps=DEEPAR_ADDITIONAL_PIP_DEPS ) diff --git a/runtime/databricks/automl_runtime/forecast/model.py b/runtime/databricks/automl_runtime/forecast/model.py index 2ba5fe66..4b1aa25c 100644 --- a/runtime/databricks/automl_runtime/forecast/model.py +++ b/runtime/databricks/automl_runtime/forecast/model.py @@ -57,10 +57,14 @@ def mlflow_forecast_log_model(forecast_model: ForecastModel, :param forecast_model: Forecast model wrapper :param sample_input: sample input Dataframes for model inference """ - # log the model without signature if infer_signature is failed. + # TODO: [ML-46185] we should not be logging without a signature since it cannot be registered to UC then try: signature = forecast_model.infer_signature(sample_input) except Exception: # noqa signature = None - mlflow.pyfunc.log_model("model", conda_env=forecast_model.model_env, - python_model=forecast_model, signature=signature) + mlflow.pyfunc.log_model( + artifact_path="model", + conda_env=forecast_model.model_env, + python_model=forecast_model, + signature=signature + ) diff --git a/runtime/databricks/automl_runtime/forecast/pmdarima/model.py b/runtime/databricks/automl_runtime/forecast/pmdarima/model.py index c99d5d01..64e5377d 100644 --- a/runtime/databricks/automl_runtime/forecast/pmdarima/model.py +++ b/runtime/databricks/automl_runtime/forecast/pmdarima/model.py @@ -28,13 +28,17 @@ from databricks.automl_runtime.forecast.model import ForecastModel, mlflow_forecast_log_model from databricks.automl_runtime.forecast.utils import calculate_period_differences, is_frequency_consistency, \ make_future_dataframe, make_single_future_dataframe +from databricks.automl_runtime import version +ARIMA_ADDITIONAL_PIP_DEPS = [ + f"pmdarima=={pmdarima.__version__}", + f"pandas=={pd.__version__}", + f"databricks-automl-runtime=={version.__version__}" +] + ARIMA_CONDA_ENV = _mlflow_conda_env( - additional_pip_deps=[ - f"pmdarima=={pmdarima.__version__}", - f"pandas=={pd.__version__}", - ] + additional_pip_deps=ARIMA_ADDITIONAL_PIP_DEPS ) diff --git a/runtime/databricks/automl_runtime/forecast/prophet/model.py b/runtime/databricks/automl_runtime/forecast/prophet/model.py index ed155a3f..f678ce6b 100644 --- a/runtime/databricks/automl_runtime/forecast/prophet/model.py +++ b/runtime/databricks/automl_runtime/forecast/prophet/model.py @@ -29,12 +29,14 @@ from databricks.automl_runtime.forecast.utils import is_quaterly_alias, make_future_dataframe -PROPHET_CONDA_ENV = _mlflow_conda_env( - additional_pip_deps=[ +PROPHET_ADDITIONAL_PIP_DEPS = [ f"prophet=={prophet.__version__}", f"cloudpickle=={cloudpickle.__version__}", f"databricks-automl-runtime=={version.__version__}", ] + +PROPHET_CONDA_ENV = _mlflow_conda_env( + additional_pip_deps=PROPHET_ADDITIONAL_PIP_DEPS ) diff --git a/runtime/tests/automl_runtime/forecast/deepar/model_test.py b/runtime/tests/automl_runtime/forecast/deepar/model_test.py index f896688a..bf9ac93a 100644 --- a/runtime/tests/automl_runtime/forecast/deepar/model_test.py +++ b/runtime/tests/automl_runtime/forecast/deepar/model_test.py @@ -25,7 +25,9 @@ from gluonts.torch.model.predictor import PyTorchPredictor from databricks.automl_runtime.forecast.deepar.model import ( - DeepARModel, mlflow_deepar_log_model, + DeepARModel, + mlflow_deepar_log_model, + DEEPAR_ADDITIONAL_PIP_DEPS ) @@ -104,8 +106,12 @@ def test_model_save_and_load_single_series(self): mlflow_deepar_log_model(deepar_model, sample_input) run_id = run.info.run_id - loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") + # check if all additional dependencies are logged + self._check_requirements(run_id) + + # load the model and predict + loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") pred_df = loaded_model.predict(sample_input) assert pred_df.columns.tolist() == [time_col, "yhat"] @@ -145,10 +151,23 @@ def test_model_save_and_load_multi_series(self): mlflow_deepar_log_model(deepar_model, sample_input) run_id = run.info.run_id - loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") + # check if all additional dependencies are logged + self._check_requirements(run_id) + + # load the model and predict + loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") pred_df = loaded_model.predict(sample_input) assert pred_df.columns.tolist() == [time_col, "yhat", id_col] assert len(pred_df) == self.prediction_length * 2 assert pred_df[time_col].min() > sample_input[time_col].max() + + def _check_requirements(self, run_id: str): + # read requirements.txt from the run + requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt") + with open(requirements_path, "r") as f: + requirements = f.read() + # check if all additional dependencies are logged + for dependency in DEEPAR_ADDITIONAL_PIP_DEPS: + self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}") diff --git a/runtime/tests/automl_runtime/forecast/pmdarima/model_test.py b/runtime/tests/automl_runtime/forecast/pmdarima/model_test.py index f7616787..3c6d0d40 100644 --- a/runtime/tests/automl_runtime/forecast/pmdarima/model_test.py +++ b/runtime/tests/automl_runtime/forecast/pmdarima/model_test.py @@ -25,8 +25,13 @@ from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE from pmdarima.arima import ARIMA -from databricks.automl_runtime.forecast.pmdarima.model import ArimaModel, MultiSeriesArimaModel, AbstractArimaModel, \ - mlflow_arima_log_model +from databricks.automl_runtime.forecast.pmdarima.model import ( + ArimaModel, + MultiSeriesArimaModel, + AbstractArimaModel, + mlflow_arima_log_model, + ARIMA_ADDITIONAL_PIP_DEPS, +) class TestArimaModel(unittest.TestCase): @@ -438,6 +443,11 @@ def test_mlflow_arima_log_model(self): # Load the saved model from mlflow run_id = run.info.run_id + + # Check additonal requirements logged correctly + self._check_requirements(run_id) + + # Load the model loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") # Make sure can make forecasts with the saved model @@ -460,6 +470,11 @@ def test_mlflow_arima_log_model_multiseries(self): # Load the saved model from mlflow run_id = run.info.run_id + + # Check additonal requirements logged correctly + self._check_requirements(run_id) + + # Load the model loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") # Make sure can make forecasts with the saved model @@ -473,3 +488,12 @@ def test_mlflow_arima_log_model_multiseries(self): # Make sure can make forecasts for one-row dataframe loaded_model.predict(test_df[0:1]) + + def _check_requirements(self, run_id: str): + # read requirements.txt from the run + requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt") + with open(requirements_path, "r") as f: + requirements = f.read() + # check if all additional dependencies are logged + for dependency in ARIMA_ADDITIONAL_PIP_DEPS: + self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}") diff --git a/runtime/tests/automl_runtime/forecast/prophet/model_test.py b/runtime/tests/automl_runtime/forecast/prophet/model_test.py index a4b321a8..4ce65dae 100644 --- a/runtime/tests/automl_runtime/forecast/prophet/model_test.py +++ b/runtime/tests/automl_runtime/forecast/prophet/model_test.py @@ -32,12 +32,22 @@ ProphetModel, OFFSET_ALIAS_MAP, DATE_OFFSET_KEYWORD_MAP, + PROPHET_ADDITIONAL_PIP_DEPS ) PROPHET_MODEL_JSON = '{"growth": "linear", "n_changepoints": 6, "specified_changepoints": false, "changepoint_range": 0.8, "yearly_seasonality": "auto", "weekly_seasonality": "auto", "daily_seasonality": "auto", "seasonality_mode": "additive", "seasonality_prior_scale": 10.0, "changepoint_prior_scale": 0.05, "holidays_prior_scale": 10.0, "mcmc_samples": 0, "interval_width": 0.8, "uncertainty_samples": 1000, "y_scale": 8.0, "logistic_floor": false, "country_holidays": null, "component_modes": {"additive": ["weekly", "additive_terms", "extra_regressors_additive", "holidays"], "multiplicative": ["multiplicative_terms", "extra_regressors_multiplicative"]}, "changepoints": "{\\"name\\":\\"ds\\",\\"index\\":[1,2,3,4,5,6],\\"data\\":[\\"2020-10-04T00:00:00.000\\",\\"2020-10-07T00:00:00.000\\",\\"2020-10-10T00:00:00.000\\",\\"2020-10-13T00:00:00.000\\",\\"2020-10-16T00:00:00.000\\",\\"2020-10-19T00:00:00.000\\"]}", "history_dates": "{\\"name\\":\\"ds\\",\\"index\\":[0,1,2,3,4,5,6,7,8],\\"data\\":[\\"2020-10-01T00:00:00.000\\",\\"2020-10-04T00:00:00.000\\",\\"2020-10-07T00:00:00.000\\",\\"2020-10-10T00:00:00.000\\",\\"2020-10-13T00:00:00.000\\",\\"2020-10-16T00:00:00.000\\",\\"2020-10-19T00:00:00.000\\",\\"2020-10-22T00:00:00.000\\",\\"2020-10-25T00:00:00.000\\"]}", "train_holiday_names": null, "start": 1601510400.0, "t_scale": 2073600.0, "holidays": null, "history": "{\\"schema\\":{\\"fields\\":[{\\"name\\":\\"ds\\",\\"type\\":\\"datetime\\"},{\\"name\\":\\"y\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"floor\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"t\\",\\"type\\":\\"number\\"},{\\"name\\":\\"y_scaled\\",\\"type\\":\\"number\\"}],\\"pandas_version\\":\\"1.4.0\\"},\\"data\\":[{\\"ds\\":\\"2020-10-01T00:00:00.000\\",\\"y\\":0,\\"floor\\":0,\\"t\\":0.0,\\"y_scaled\\":0.0},{\\"ds\\":\\"2020-10-04T00:00:00.000\\",\\"y\\":1,\\"floor\\":0,\\"t\\":0.125,\\"y_scaled\\":0.125},{\\"ds\\":\\"2020-10-07T00:00:00.000\\",\\"y\\":2,\\"floor\\":0,\\"t\\":0.25,\\"y_scaled\\":0.25},{\\"ds\\":\\"2020-10-10T00:00:00.000\\",\\"y\\":3,\\"floor\\":0,\\"t\\":0.375,\\"y_scaled\\":0.375},{\\"ds\\":\\"2020-10-13T00:00:00.000\\",\\"y\\":4,\\"floor\\":0,\\"t\\":0.5,\\"y_scaled\\":0.5},{\\"ds\\":\\"2020-10-16T00:00:00.000\\",\\"y\\":5,\\"floor\\":0,\\"t\\":0.625,\\"y_scaled\\":0.625},{\\"ds\\":\\"2020-10-19T00:00:00.000\\",\\"y\\":6,\\"floor\\":0,\\"t\\":0.75,\\"y_scaled\\":0.75},{\\"ds\\":\\"2020-10-22T00:00:00.000\\",\\"y\\":7,\\"floor\\":0,\\"t\\":0.875,\\"y_scaled\\":0.875},{\\"ds\\":\\"2020-10-25T00:00:00.000\\",\\"y\\":8,\\"floor\\":0,\\"t\\":1.0,\\"y_scaled\\":1.0}]}", "train_component_cols": "{\\"schema\\":{\\"fields\\":[{\\"name\\":\\"additive_terms\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"weekly\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"multiplicative_terms\\",\\"type\\":\\"integer\\"}],\\"pandas_version\\":\\"1.4.0\\"},\\"data\\":[{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0}]}", "changepoints_t": [0.125, 0.25, 0.375, 0.5, 0.625, 0.75], "seasonalities": [["weekly"], {"weekly": {"period": 7, "fourier_order": 3, "prior_scale": 10.0, "mode": "additive", "condition_name": null}}], "extra_regressors": [[], {}], "fit_kwargs": {}, "params": {"lp__": [[202.053]], "k": [[1.19777]], "m": [[0.0565623]], "delta": [[-0.86152, 0.409957, -0.103241, 0.528979, 0.535181, -0.509356]], "sigma_obs": [[2.53056e-13]], "beta": [[-0.00630566, 0.016248, 0.0318587, -0.068705, 0.0029986, -0.00410522]], "trend": [[0.0565623, 0.206283, 0.248314, 0.341589, 0.421959, 0.568452, 0.781842, 0.931562, 1.08128]]}, "__prophet_version": "1.1.1"}' - -class TestProphetModel(unittest.TestCase): +class BaseProphetModelTest(unittest.TestCase): + def _check_requirements(self, run_id: str): + # read requirements.txt from the run + requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt") + with open(requirements_path, "r") as f: + requirements = f.read() + # check if all additional dependencies are logged + for dependency in PROPHET_ADDITIONAL_PIP_DEPS: + self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}") + +class TestProphetModel(BaseProphetModelTest): @classmethod def setUpClass(cls) -> None: num_rows = 9 @@ -74,8 +84,13 @@ def test_model_save_and_load(self): with mlflow.start_run() as run: mlflow_prophet_log_model(prophet_model) - # Load the saved model from mlflow + run_id = run.info.run_id + + # Check additonal requirements logged correctly + self._check_requirements(run_id) + + # Load the saved model from mlflow prophet_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") # Check the prediction with the saved model @@ -144,7 +159,7 @@ def test_validate_predict_cols(self): assert e.value.error_code == ErrorCode.Name(INTERNAL_ERROR) -class TestMultiSeriesProphetModel(unittest.TestCase): +class TestMultiSeriesProphetModel(BaseProphetModelTest): @classmethod def setUpClass(cls) -> None: cls.model_json = PROPHET_MODEL_JSON @@ -178,8 +193,13 @@ def test_model_save_and_load(self): with mlflow.start_run() as run: mlflow_prophet_log_model(self.prophet_model, sample_input=test_df) - # Load the saved model from mlflow + run_id = run.info.run_id + + # Check additonal requirements logged correctly + self._check_requirements(run_id) + + # Load the saved model from mlflow loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") # Check the prediction with the saved model @@ -239,9 +259,13 @@ def test_model_save_and_load_multi_ids(self): ) with mlflow.start_run() as run: mlflow_prophet_log_model(prophet_model, sample_input=test_df) + + run_id = run.info.run_id + + # Check additonal requirements logged correctly + self._check_requirements(run_id) # Load the saved model from mlflow - run_id = run.info.run_id loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model") # Check the prediction with the saved model