databricks · sabhya-db · Oct 7, 2024 · Oct 5, 2024 · Oct 5, 2024 · Oct 5, 2024
diff --git a/runtime/databricks/automl_runtime/forecast/deepar/model.py b/runtime/databricks/automl_runtime/forecast/deepar/model.py
@@ -23,12 +23,17 @@
 from mlflow.utils.environment import _mlflow_conda_env
 
 from databricks.automl_runtime.forecast.model import ForecastModel, mlflow_forecast_log_model
+from databricks.automl_runtime import version
+
+
+DEEPAR_ADDITIONAL_PIP_DEPS = [
+    f"gluonts[torch]=={gluonts.__version__}",
+    f"pandas=={pd.__version__}",
+    f"databricks-automl-runtime=={version.__version__}"
+]
 
 DEEPAR_CONDA_ENV = _mlflow_conda_env(
-    additional_pip_deps=[
-        f"gluonts[torch]=={gluonts.__version__}",
-        f"pandas=={pd.__version__}",
-    ]
+    additional_pip_deps=DEEPAR_ADDITIONAL_PIP_DEPS
 )
 
 

diff --git a/runtime/databricks/automl_runtime/forecast/model.py b/runtime/databricks/automl_runtime/forecast/model.py
@@ -57,10 +57,14 @@ def mlflow_forecast_log_model(forecast_model: ForecastModel,
     :param forecast_model: Forecast model wrapper
     :param sample_input: sample input Dataframes for model inference
     """
-    # log the model without signature if infer_signature is failed.
+    # TODO: we should not be logging without a signature since it cannot be registered to UC then
     try:
         signature = forecast_model.infer_signature(sample_input)
     except Exception: # noqa
         signature = None
-    mlflow.pyfunc.log_model("model", conda_env=forecast_model.model_env,
-                            python_model=forecast_model, signature=signature)
+    mlflow.pyfunc.log_model(
+        artifact_path="model", 
+        conda_env=forecast_model.model_env,
+        python_model=forecast_model,
+        signature=signature
+    )
diff --git a/runtime/databricks/automl_runtime/forecast/pmdarima/model.py b/runtime/databricks/automl_runtime/forecast/pmdarima/model.py
@@ -28,13 +28,17 @@
 from databricks.automl_runtime.forecast.model import ForecastModel, mlflow_forecast_log_model
 from databricks.automl_runtime.forecast.utils import calculate_period_differences, is_frequency_consistency, \
     make_future_dataframe, make_single_future_dataframe
+from databricks.automl_runtime import version
 
 
+ARIMA_ADDITIONAL_PIP_DEPS = [
+    f"pmdarima=={pmdarima.__version__}",
+    f"pandas=={pd.__version__}",
+    f"databricks-automl-runtime=={version.__version__}"
+]
+
 ARIMA_CONDA_ENV = _mlflow_conda_env(
-    additional_pip_deps=[
-        f"pmdarima=={pmdarima.__version__}",
-        f"pandas=={pd.__version__}",
-    ]
+    additional_pip_deps=ARIMA_ADDITIONAL_PIP_DEPS
 )
 
 

diff --git a/runtime/databricks/automl_runtime/forecast/prophet/model.py b/runtime/databricks/automl_runtime/forecast/prophet/model.py
@@ -29,12 +29,14 @@
 from databricks.automl_runtime.forecast.utils import is_quaterly_alias, make_future_dataframe
 
 
-PROPHET_CONDA_ENV = _mlflow_conda_env(
-    additional_pip_deps=[
+PROPHET_ADDITIONAL_PIP_DEPS = [
         f"prophet=={prophet.__version__}",
         f"cloudpickle=={cloudpickle.__version__}",
         f"databricks-automl-runtime=={version.__version__}",
     ]
+
+PROPHET_CONDA_ENV = _mlflow_conda_env(
+    additional_pip_deps=PROPHET_ADDITIONAL_PIP_DEPS
 )
 
 

diff --git a/runtime/tests/automl_runtime/forecast/deepar/model_test.py b/runtime/tests/automl_runtime/forecast/deepar/model_test.py
@@ -25,7 +25,7 @@
 from gluonts.torch.model.predictor import PyTorchPredictor
 
 from databricks.automl_runtime.forecast.deepar.model import (
-    DeepARModel, mlflow_deepar_log_model,
+    DeepARModel, mlflow_deepar_log_model, DEEPAR_ADDITIONAL_PIP_DEPS
 )
 
 
@@ -104,8 +104,12 @@ def test_model_save_and_load_single_series(self):
             mlflow_deepar_log_model(deepar_model, sample_input)
 
         run_id = run.info.run_id
-        loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
 
+        # check if all additional dependencies are logged
+        self._check_requirements(run_id)
+
+        # load the model and predict
+        loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
         pred_df = loaded_model.predict(sample_input)
 
         assert pred_df.columns.tolist() == [time_col, "yhat"]
@@ -145,10 +149,23 @@ def test_model_save_and_load_multi_series(self):
             mlflow_deepar_log_model(deepar_model, sample_input)
 
         run_id = run.info.run_id
-        loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
 
+        # check if all additional dependencies are logged
+        self._check_requirements(run_id)
+
+        # load the model and predict
+        loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
         pred_df = loaded_model.predict(sample_input)
 
         assert pred_df.columns.tolist() == [time_col, "yhat", id_col]
         assert len(pred_df) == self.prediction_length * 2
         assert pred_df[time_col].min() > sample_input[time_col].max()
+
+    def _check_requirements(self, run_id: str):
+        # read requirements.txt from the run
+        requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt")
+        with open(requirements_path, "r") as f:
+            requirements = f.read()
+        # check if all additional dependencies are logged
+        for dependency in DEEPAR_ADDITIONAL_PIP_DEPS:
+            self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}")
diff --git a/runtime/tests/automl_runtime/forecast/pmdarima/model_test.py b/runtime/tests/automl_runtime/forecast/pmdarima/model_test.py
@@ -25,8 +25,10 @@
 from mlflow.protos.databricks_pb2 import ErrorCode, INVALID_PARAMETER_VALUE
 from pmdarima.arima import ARIMA
 
-from databricks.automl_runtime.forecast.pmdarima.model import ArimaModel, MultiSeriesArimaModel, AbstractArimaModel, \
-    mlflow_arima_log_model
+from databricks.automl_runtime.forecast.pmdarima.model import (
+    ArimaModel, MultiSeriesArimaModel, AbstractArimaModel, \
+    mlflow_arima_log_model, ARIMA_ADDITIONAL_PIP_DEPS
+)
 
 
 class TestArimaModel(unittest.TestCase):
@@ -438,6 +440,11 @@ def test_mlflow_arima_log_model(self):
 
         # Load the saved model from mlflow
         run_id = run.info.run_id
+
+        # Check additonal requirements logged correctly
+        self._check_requirements(run_id)
+
+        # Load the model
         loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
 
         # Make sure can make forecasts with the saved model
@@ -460,6 +467,11 @@ def test_mlflow_arima_log_model_multiseries(self):
 
         # Load the saved model from mlflow
         run_id = run.info.run_id
+
+        # Check additonal requirements logged correctly
+        self._check_requirements(run_id)
+
+        # Load the model
         loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
 
         # Make sure can make forecasts with the saved model
@@ -473,3 +485,12 @@ def test_mlflow_arima_log_model_multiseries(self):
 
         # Make sure can make forecasts for one-row dataframe
         loaded_model.predict(test_df[0:1])
+
+    def _check_requirements(self, run_id: str):
+        # read requirements.txt from the run
+        requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt")
+        with open(requirements_path, "r") as f:
+            requirements = f.read()
+        # check if all additional dependencies are logged
+        for dependency in ARIMA_ADDITIONAL_PIP_DEPS:
+            self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}")
diff --git a/runtime/tests/automl_runtime/forecast/prophet/model_test.py b/runtime/tests/automl_runtime/forecast/prophet/model_test.py
@@ -32,12 +32,22 @@
     ProphetModel,
     OFFSET_ALIAS_MAP,
     DATE_OFFSET_KEYWORD_MAP,
+    PROPHET_ADDITIONAL_PIP_DEPS
 )
 
 PROPHET_MODEL_JSON = '{"growth": "linear", "n_changepoints": 6, "specified_changepoints": false, "changepoint_range": 0.8, "yearly_seasonality": "auto", "weekly_seasonality": "auto", "daily_seasonality": "auto", "seasonality_mode": "additive", "seasonality_prior_scale": 10.0, "changepoint_prior_scale": 0.05, "holidays_prior_scale": 10.0, "mcmc_samples": 0, "interval_width": 0.8, "uncertainty_samples": 1000, "y_scale": 8.0, "logistic_floor": false, "country_holidays": null, "component_modes": {"additive": ["weekly", "additive_terms", "extra_regressors_additive", "holidays"], "multiplicative": ["multiplicative_terms", "extra_regressors_multiplicative"]}, "changepoints": "{\\"name\\":\\"ds\\",\\"index\\":[1,2,3,4,5,6],\\"data\\":[\\"2020-10-04T00:00:00.000\\",\\"2020-10-07T00:00:00.000\\",\\"2020-10-10T00:00:00.000\\",\\"2020-10-13T00:00:00.000\\",\\"2020-10-16T00:00:00.000\\",\\"2020-10-19T00:00:00.000\\"]}", "history_dates": "{\\"name\\":\\"ds\\",\\"index\\":[0,1,2,3,4,5,6,7,8],\\"data\\":[\\"2020-10-01T00:00:00.000\\",\\"2020-10-04T00:00:00.000\\",\\"2020-10-07T00:00:00.000\\",\\"2020-10-10T00:00:00.000\\",\\"2020-10-13T00:00:00.000\\",\\"2020-10-16T00:00:00.000\\",\\"2020-10-19T00:00:00.000\\",\\"2020-10-22T00:00:00.000\\",\\"2020-10-25T00:00:00.000\\"]}", "train_holiday_names": null, "start": 1601510400.0, "t_scale": 2073600.0, "holidays": null, "history": "{\\"schema\\":{\\"fields\\":[{\\"name\\":\\"ds\\",\\"type\\":\\"datetime\\"},{\\"name\\":\\"y\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"floor\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"t\\",\\"type\\":\\"number\\"},{\\"name\\":\\"y_scaled\\",\\"type\\":\\"number\\"}],\\"pandas_version\\":\\"1.4.0\\"},\\"data\\":[{\\"ds\\":\\"2020-10-01T00:00:00.000\\",\\"y\\":0,\\"floor\\":0,\\"t\\":0.0,\\"y_scaled\\":0.0},{\\"ds\\":\\"2020-10-04T00:00:00.000\\",\\"y\\":1,\\"floor\\":0,\\"t\\":0.125,\\"y_scaled\\":0.125},{\\"ds\\":\\"2020-10-07T00:00:00.000\\",\\"y\\":2,\\"floor\\":0,\\"t\\":0.25,\\"y_scaled\\":0.25},{\\"ds\\":\\"2020-10-10T00:00:00.000\\",\\"y\\":3,\\"floor\\":0,\\"t\\":0.375,\\"y_scaled\\":0.375},{\\"ds\\":\\"2020-10-13T00:00:00.000\\",\\"y\\":4,\\"floor\\":0,\\"t\\":0.5,\\"y_scaled\\":0.5},{\\"ds\\":\\"2020-10-16T00:00:00.000\\",\\"y\\":5,\\"floor\\":0,\\"t\\":0.625,\\"y_scaled\\":0.625},{\\"ds\\":\\"2020-10-19T00:00:00.000\\",\\"y\\":6,\\"floor\\":0,\\"t\\":0.75,\\"y_scaled\\":0.75},{\\"ds\\":\\"2020-10-22T00:00:00.000\\",\\"y\\":7,\\"floor\\":0,\\"t\\":0.875,\\"y_scaled\\":0.875},{\\"ds\\":\\"2020-10-25T00:00:00.000\\",\\"y\\":8,\\"floor\\":0,\\"t\\":1.0,\\"y_scaled\\":1.0}]}", "train_component_cols": "{\\"schema\\":{\\"fields\\":[{\\"name\\":\\"additive_terms\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"weekly\\",\\"type\\":\\"integer\\"},{\\"name\\":\\"multiplicative_terms\\",\\"type\\":\\"integer\\"}],\\"pandas_version\\":\\"1.4.0\\"},\\"data\\":[{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0},{\\"additive_terms\\":1,\\"weekly\\":1,\\"multiplicative_terms\\":0}]}", "changepoints_t": [0.125, 0.25, 0.375, 0.5, 0.625, 0.75], "seasonalities": [["weekly"], {"weekly": {"period": 7, "fourier_order": 3, "prior_scale": 10.0, "mode": "additive", "condition_name": null}}], "extra_regressors": [[], {}], "fit_kwargs": {}, "params": {"lp__": [[202.053]], "k": [[1.19777]], "m": [[0.0565623]], "delta": [[-0.86152, 0.409957, -0.103241, 0.528979, 0.535181, -0.509356]], "sigma_obs": [[2.53056e-13]], "beta": [[-0.00630566, 0.016248, 0.0318587, -0.068705, 0.0029986, -0.00410522]], "trend": [[0.0565623, 0.206283, 0.248314, 0.341589, 0.421959, 0.568452, 0.781842, 0.931562, 1.08128]]}, "__prophet_version": "1.1.1"}'
 
-
-class TestProphetModel(unittest.TestCase):
+class BaseProphetModelTest(unittest.TestCase):
+    def _check_requirements(self, run_id: str):
+        # read requirements.txt from the run
+        requirements_path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/requirements.txt")
+        with open(requirements_path, "r") as f:
+            requirements = f.read()
+        # check if all additional dependencies are logged
+        for dependency in PROPHET_ADDITIONAL_PIP_DEPS:
+            self.assertIn(dependency, requirements, f"requirements.txt should contain {dependency} but got {requirements}")
+
+class TestProphetModel(BaseProphetModelTest):
     @classmethod
     def setUpClass(cls) -> None:
         num_rows = 9
@@ -74,8 +84,13 @@ def test_model_save_and_load(self):
 
         with mlflow.start_run() as run:
             mlflow_prophet_log_model(prophet_model)
-        # Load the saved model from mlflow
+
         run_id = run.info.run_id
+
+        # Check additonal requirements logged correctly
+        self._check_requirements(run_id)
+
+        # Load the saved model from mlflow
         prophet_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
 
         # Check the prediction with the saved model
@@ -144,7 +159,7 @@ def test_validate_predict_cols(self):
         assert e.value.error_code == ErrorCode.Name(INTERNAL_ERROR)
 
 
-class TestMultiSeriesProphetModel(unittest.TestCase):
+class TestMultiSeriesProphetModel(BaseProphetModelTest):
     @classmethod
     def setUpClass(cls) -> None:
         cls.model_json = PROPHET_MODEL_JSON
@@ -178,8 +193,13 @@ def test_model_save_and_load(self):
         with mlflow.start_run() as run:
             mlflow_prophet_log_model(self.prophet_model, sample_input=test_df)
 
-        # Load the saved model from mlflow
+
         run_id = run.info.run_id
+
+        # Check additonal requirements logged correctly
+        self._check_requirements(run_id)
+
+        # Load the saved model from mlflow
         loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
 
         # Check the prediction with the saved model
@@ -239,9 +259,13 @@ def test_model_save_and_load_multi_ids(self):
         )
         with mlflow.start_run() as run:
             mlflow_prophet_log_model(prophet_model, sample_input=test_df)
+
+        run_id = run.info.run_id
+
+        # Check additonal requirements logged correctly
+        self._check_requirements(run_id)
 
         # Load the saved model from mlflow
-        run_id = run.info.run_id
         loaded_model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
 
         # Check the prediction with the saved model