Merge branch 'develop' into jobs_yaml

qiuosier · web-flow · commit 07476cf18c8c · 2023-04-06T13:45:28.000-04:00
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -0,0 +1,42 @@
+name: "[DO NOT TRIGGER] Publish to PyPI"
+
+# To run this workflow manually from the Actions tab
+on: workflow_dispatch
+
+jobs:
+  build-n-publish:
+    name: Build and publish Python 🐍 distribution 📦 to PyPI
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.x"
+      - name: Build distribution 📦
+        run: |
+          pip install wheel
+          make dist
+      - name: Validate
+        run: |
+          pip install dist/*.whl
+          python -c "import ads;"
+## To run publish to test PyPI secret with token needs to be added,
+##    this one GH_ADS_TESTPYPI_TOKEN - removed after initial test.
+## Project name also needed to be updated in setup.py - setup(name="test_oracle_ads", ...),
+##    regular name is occupied by former developer and can't be used for testing
+#      - name: Publish distribution 📦 to Test PyPI
+#        env:
+#          TWINE_USERNAME: __token__
+#          TWINE_PASSWORD: ${{ secrets.GH_ADS_TESTPYPI_TOKEN }}
+#        run: |
+#          pip install twine
+#          twine upload -r testpypi dist/* -u $TWINE_USERNAME -p $TWINE_PASSWORD
+      - name: Publish distribution 📦 to PyPI
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.GH_ADS_PYPI_TOKEN }}
+        run: |
+          pip install twine
+          twine upload dist/* -u $TWINE_USERNAME -p $TWINE_PASSWORD
diff --git a/ads/model/generic_model.py b/ads/model/generic_model.py
@@ -789,6 +789,7 @@ def prepare(
         ignore_pending_changes: bool = True,
         max_col_num: int = DATA_SCHEMA_MAX_COL_NUM,
         ignore_conda_error: bool = False,
+        score_py_uri: str = None,
         **kwargs: Dict,
     ) -> "GenericModel":
         """Prepare and save the score.py, serialized model and runtime.yaml file.
@@ -841,6 +842,10 @@ def prepare(
             number of features(columns).
         ignore_conda_error: (bool, optional). Defaults to False.
             Parameter to ignore error when collecting conda information.
+        score_py_uri: (str, optional). Defaults to None.
+            The uri of the customized score.py, which can be local path or OCI object storage URI.
+            When provide with this attibute, the `score.py` will not be auto generated, and the
+            provided `score.py` will be added into artifact_dir.
         kwargs:
             impute_values: (dict, optional).
                 The dictionary where the key is the column index(or names is accepted
@@ -1001,13 +1006,22 @@ def prepare(
                 jinja_template_filename = (
                     "score-pkl" if self._serialize else "score_generic"
                 )
-        self.model_artifact.prepare_score_py(
-            jinja_template_filename=jinja_template_filename,
-            model_file_name=self.model_file_name,
-            data_deserializer=self.model_input_serializer.name,
-            model_serializer=self.model_save_serializer.name,
-            **{**kwargs, **self._score_args},
-        )
+
+        if score_py_uri:
+            utils.copy_file(
+                uri_src=score_py_uri,
+                uri_dst=os.path.join(self.artifact_dir, "score.py"),
+                force_overwrite=force_overwrite,
+                auth=self.auth
+            )
+        else:
+            self.model_artifact.prepare_score_py(
+                jinja_template_filename=jinja_template_filename,
+                model_file_name=self.model_file_name,
+                data_deserializer=self.model_input_serializer.name,
+                model_serializer=self.model_save_serializer.name,
+                **{**kwargs, **self._score_args},
+            )
 
         self._summary_status.update_status(
             detail="Generated score.py", status=ModelState.DONE.value
@@ -2483,6 +2497,7 @@ def predict(
         self,
         data: Any = None,
         auto_serialize_data: bool = False,
+        local: bool = False,
         **kwargs,
     ) -> Dict[str, Any]:
         """Returns prediction of input data run against the model deployment endpoint.
@@ -2507,6 +2522,8 @@ def predict(
             Whether to auto serialize input data. Defauls to `False` for GenericModel, and `True` for other frameworks.
             `data` required to be json serializable if `auto_serialize_data=False`.
             If `auto_serialize_data` set to True, data will be serialized before sending to model deployment endpoint.
+        local: bool.
+            Whether to invoke the prediction locally. Default to False.
         kwargs:
             content_type: str, used to indicate the media type of the resource.
             image: PIL.Image Object or uri for the image.
@@ -2525,10 +2542,21 @@ def predict(
         NotActiveDeploymentError
             If model deployment process was not started or not finished yet.
         ValueError
-            If `data` is empty or not JSON serializable.
+            If model is not deployed yet or the endpoint information is not available.
         """
-        if not self.model_deployment:
-            raise ValueError("Use `deploy()` method to start model deployment.")
+        if local:
+            return self.verify(
+                data=data, auto_serialize_data=auto_serialize_data, **kwargs
+            )
+
+        if not (self.model_deployment and self.model_deployment.url):
+            raise ValueError(
+                "Error invoking the remote endpoint as the model is not "
+                "deployed yet or the endpoint information is not available. "
+                "Use `deploy()` method to start model deployment. "
+                "If you intend to invoke inference using locally available "
+                "model artifact, set parameter `local=True`"
+            )
 
         current_state = self.model_deployment.state.name.upper()
         if current_state != ModelDeploymentState.ACTIVE.name:
diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -13,4 +13,5 @@ xlrd>=1.2.0
 lxml
 fastparquet
 imbalanced-learn
-pyarrow
+pyarrow
+mysql-connector-python
diff --git a/docs/source/user_guide/model_registration/model_artifact.rst b/docs/source/user_guide/model_registration/model_artifact.rst
@@ -30,6 +30,7 @@ Auto generation of ``score.py`` with framework specific code for loading models
 
 To accomodate for other frameworks that are unknown to ADS, a template code for ``score.py`` is generated in the provided artificat directory location.
 
+
 Prepare the Model Artifact
 --------------------------
 
@@ -98,8 +99,25 @@ ADS automatically captures:
 *  ``UseCaseType`` in ``metadata_taxonomy`` cannot be automatically populated. One way to populate the use case is to pass ``use_case_type`` to the ``prepare`` method.
 *  Model introspection is automatically triggered.
 
-.. include:: _template/score.rst
+Prepare with custom ``score.py``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 2.8.4
 
+You could provide the location of your own ``score.py`` by ``score_py_uri`` in :py:meth:`~ads.model.GenericModel.prepare`.
+The provided ``score.py`` will be added into model artifact.
+
+.. code-block:: python3
+
+    tf_model.prepare(
+        inference_conda_env="generalml_p38_cpu_v1",
+        use_case_type=UseCaseType.MULTINOMIAL_CLASSIFICATION,
+        X_sample=trainx,
+        y_sample=trainy,
+        score_py_uri="/path/to/score.py"
+    )
+
+.. include:: _template/score.rst
 
 Model Introspection
 -------------------
diff --git a/setup.py b/setup.py
@@ -69,7 +69,6 @@
         "nbformat",
         "inflection",
     ],
-    "mysql": ["mysql-connector-python"],
     "bds": ["ibis-framework[impala]", "hdfs[kerberos]", "sqlalchemy"],
     "spark": ["pyspark>=3.0.0"],
     "huggingface": ["transformers"],
diff --git a/tests/unitary/with_extras/model/test_files/custom_score.py b/tests/unitary/with_extras/model/test_files/custom_score.py
@@ -0,0 +1,11 @@
+# THIS IS A CUSTOM SCORE.PY
+
+model_name = "model.pkl"
+
+
+def load_model(model_file_name=model_name):
+    return model_file_name
+
+
+def predict(data, model=load_model()):
+    return {"prediction": "This is a custom score.py."}
diff --git a/tests/unitary/with_extras/model/test_generic_model.py b/tests/unitary/with_extras/model/test_generic_model.py
@@ -168,14 +168,30 @@
     "training_script": None,
 }
 
+INFERENCE_CONDA_ENV = "oci://bucket@namespace/<path_to_service_pack>"
+TRAINING_CONDA_ENV = "oci://bucket@namespace/<path_to_service_pack>"
+DEFAULT_PYTHON_VERSION = "3.8"
+MODEL_FILE_NAME = "fake_model_name"
+FAKE_MD_URL = "http://<model-deployment-url>"
+
+
+def _prepare(model):
+    model.prepare(
+        inference_conda_env=INFERENCE_CONDA_ENV,
+        inference_python_version=DEFAULT_PYTHON_VERSION,
+        training_conda_env=TRAINING_CONDA_ENV,
+        training_python_version=DEFAULT_PYTHON_VERSION,
+        model_file_name=MODEL_FILE_NAME,
+        force_overwrite=True,
+    )
+
 
 class TestEstimator:
     def predict(self, x):
         return x**2
 
 
 class TestGenericModel:
-
     iris = load_iris()
     X, y = iris.data, iris.target
     X_train, X_test, y_train, y_test = train_test_split(X, y)
@@ -298,16 +314,22 @@ def test_prepare_both_conda_env(self, mock_signer):
         )
 
     @patch("ads.common.auth.default_signer")
-    def test_verify_without_reload(self, mock_signer):
-        """Test verify input data without reload artifacts."""
+    def test_prepare_with_custom_scorepy(self, mock_signer):
+        """Test prepare a trained model with custom score.py."""
         self.generic_model.prepare(
-            inference_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1",
-            inference_python_version="3.6",
-            training_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/Oracle_Database_for_CPU_Python_3.7/1.0/database_p37_cpu_v1",
-            training_python_version="3.7",
+            INFERENCE_CONDA_ENV,
             model_file_name="fake_model_name",
-            force_overwrite=True,
+            score_py_uri=f"{os.path.dirname(os.path.abspath(__file__))}/test_files/custom_score.py",
         )
+        assert os.path.exists(os.path.join("fake_folder", "score.py"))
+
+        prediction = self.generic_model.verify(data="test")["prediction"]
+        assert prediction == "This is a custom score.py."
+
+    @patch("ads.common.auth.default_signer")
+    def test_verify_without_reload(self, mock_signer):
+        """Test verify input data without reload artifacts."""
+        _prepare(self.generic_model)
         self.generic_model.verify(self.X_test.tolist())
 
         with patch("ads.model.artifact.ModelArtifact.reload") as mock_reload:
@@ -317,20 +339,10 @@ def test_verify_without_reload(self, mock_signer):
     @patch("ads.common.auth.default_signer")
     def test_verify(self, mock_signer):
         """Test verify input data"""
-        self.generic_model.prepare(
-            inference_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/General_Machine_Learning_for_CPUs/1.0/mlcpuv1",
-            inference_python_version="3.6",
-            training_conda_env="oci://service-conda-packs@ociodscdev/service_pack/cpu/Oracle_Database_for_CPU_Python_3.7/1.0/database_p37_cpu_v1",
-            training_python_version="3.7",
-            model_file_name="fake_model_name",
-            force_overwrite=True,
-        )
+        _prepare(self.generic_model)
         prediction_1 = self.generic_model.verify(self.X_test.tolist())
         assert isinstance(prediction_1, dict), "Failed to verify json payload."
 
-        prediction_2 = self.generic_model.verify(self.X_test.tolist())
-        assert isinstance(prediction_2, dict), "Failed to verify input data."
-
     def test_reload(self):
         """test the reload."""
         pass
@@ -622,11 +634,31 @@ def test_deploy_with_default_display_name(self, mock_deploy):
             == random_name[:-9]
         )
 
+    @pytest.mark.parametrize("input_data", [(X_test.tolist())])
+    @patch("ads.common.auth.default_signer")
+    def test_predict_locally(self, mock_signer, input_data):
+        _prepare(self.generic_model)
+        test_result = self.generic_model.predict(data=input_data, local=True)
+        expected_result = self.generic_model.estimator.predict(input_data).tolist()
+        assert (
+            test_result["prediction"] == expected_result
+        ), "Failed to verify input data."
+
+        with patch("ads.model.artifact.ModelArtifact.reload") as mock_reload:
+            self.generic_model.predict(
+                data=input_data, local=True, reload_artifacts=False
+            )
+            mock_reload.assert_not_called()
+
     @patch.object(ModelDeployment, "predict")
     @patch("ads.common.auth.default_signer")
     @patch("ads.common.oci_client.OCIClientFactory")
+    @patch(
+        "ads.model.deployment.model_deployment.ModelDeployment.url",
+        return_value=FAKE_MD_URL,
+    )
     def test_predict_with_not_active_deployment_fail(
-        self, mock_client, mock_signer, mock_predict
+        self, mock_url, mock_client, mock_signer, mock_predict
     ):
         """Ensures predict model fails in case of model deployment is not in an active state."""
         with pytest.raises(NotActiveDeploymentError):
@@ -646,7 +678,11 @@ def test_predict_with_not_active_deployment_fail(
 
     @patch("ads.common.auth.default_signer")
     @patch("ads.common.oci_client.OCIClientFactory")
-    def test_predict_bytes_success(self, mock_client, mock_signer):
+    @patch(
+        "ads.model.deployment.model_deployment.ModelDeployment.url",
+        return_value=FAKE_MD_URL,
+    )
+    def test_predict_bytes_success(self, mock_url, mock_client, mock_signer):
         """Ensures predict model passes with bytes input."""
         with patch.object(
             ModelDeployment, "state", new_callable=PropertyMock
@@ -655,7 +691,7 @@ def test_predict_bytes_success(self, mock_client, mock_signer):
             with patch.object(ModelDeployment, "predict") as mock_predict:
                 mock_predict.return_value = {"result": "result"}
                 self.generic_model.model_deployment = ModelDeployment(
-                    model_deployment_id="test"
+                    model_deployment_id="test",
                 )
                 # self.generic_model.model_deployment.current_state = ModelDeploymentState.ACTIVE
                 self.generic_model._as_onnx = False
@@ -668,7 +704,11 @@ def test_predict_bytes_success(self, mock_client, mock_signer):
 
     @patch("ads.common.auth.default_signer")
     @patch("ads.common.oci_client.OCIClientFactory")
-    def test_predict_success(self, mock_client, mock_signer):
+    @patch(
+        "ads.model.deployment.model_deployment.ModelDeployment.url",
+        return_value=FAKE_MD_URL,
+    )
+    def test_predict_success(self, mock_url, mock_client, mock_signer):
         """Ensures predict model passes with valid input parameters."""
         with patch.object(
             ModelDeployment, "state", new_callable=PropertyMock
@@ -785,7 +825,11 @@ def test_from_model_artifact(
 
     @patch("ads.common.auth.default_signer")
     @patch("ads.common.oci_client.OCIClientFactory")
-    def test_predict_success__serialize_input(self, mock_client, mock_signer):
+    @patch(
+        "ads.model.deployment.model_deployment.ModelDeployment.url",
+        return_value=FAKE_MD_URL,
+    )
+    def test_predict_success__serialize_input(self, mock_url, mock_client, mock_signer):
         """Ensures predict model passes with valid input parameters."""
 
         df = pd.DataFrame([1, 2, 3])
@@ -795,7 +839,6 @@ def test_predict_success__serialize_input(self, mock_client, mock_signer):
             with patch.object(
                 GenericModel, "get_data_serializer"
             ) as mock_get_data_serializer:
-
                 mock_get_data_serializer.return_value.data = df.to_json()
                 mock_state.return_value = ModelDeploymentState.ACTIVE
                 with patch.object(ModelDeployment, "predict") as mock_predict:
@@ -1782,7 +1825,6 @@ def test_upload_artifact_fail(self):
     def test_upload_artifact_success(self):
         """Tests uploading model artifacts to the provided `uri`."""
         with tempfile.TemporaryDirectory() as tmp_dir:
-
             # copy test artifacts to the temp folder
             shutil.copytree(
                 os.path.join(self.curr_dir, "test_files/valid_model_artifacts"),