Temporary fix to resolve circular import issue (#252)

KshitizLohia · web-flow · commit ca61db694f7f · 2023-07-09T09:39:54.000+05:30
diff --git a/ads/feature_store/common/spark_session_singleton.py b/ads/feature_store/common/spark_session_singleton.py
@@ -9,9 +9,7 @@
 
 from ads.common.decorator.runtime_dependency import OptionalDependency
 import os
-
 from ads.common.oci_client import OCIClientFactory
-from ads.feature_store.common.utils.utility import get_env_bool
 
 try:
     from delta import configure_spark_with_delta_pip
@@ -33,6 +31,31 @@
     raise
 
 
+def get_env_bool(env_var: str, default: bool = False) -> bool:
+    """
+    :param env_var: Environment variable name
+    :param default: Default environment variable value
+    :return: Value of the boolean env variable
+    """
+    env_val = os.getenv(env_var)
+    if env_val is None:
+        env_val = default
+    else:
+        env_val = env_val.lower()
+        if env_val == "true":
+            env_val = True
+        elif env_val == "false":
+            env_val = False
+        else:
+            raise ValueError(
+                "For environment variable: {0} only string values T/true or F/false are allowed but: \
+                {1} was provided.".format(
+                    env_var, env_val
+                )
+            )
+    return env_val
+
+
 def developer_enabled():
     return get_env_bool("DEVELOPER_MODE", False)
 
diff --git a/ads/feature_store/common/utils/utility.py b/ads/feature_store/common/utils/utility.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8; -*-
 import copy
 import os
+
 # Copyright (c) 2023 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
@@ -63,25 +64,6 @@ def get_execution_engine_type(
         else ExecutionEngine.SPARK
     )
 
-def get_env_bool(env_var: str, default: bool = False) -> bool:
-    """
-    :param env_var: Environment variable name
-    :param default: Default environment variable value
-    :return: Value of the boolean env variable
-    """
-    env_val = os.getenv(env_var)
-    if env_val is None:
-        env_val = default
-    else:
-        env_val = env_val.lower()
-        if env_val == "true":
-            env_val = True
-        elif env_val == "false":
-            env_val = False
-        else:
-            raise ValueError("For environment variable: {0} only string values T/true or F/false are allowed but: \
-                {1} was provided.".format(env_var, env_val))
-    return env_val
 
 def get_metastore_id(feature_store_id: str):
     """
@@ -177,6 +159,7 @@ def show_ingestion_summary(
 
 def show_validation_summary(ingestion_status: str, validation_output, expectation_type):
     from tabulate import tabulate
+
     statistics = validation_output["statistics"]
 
     table_headers = (
@@ -201,8 +184,12 @@ def show_validation_summary(ingestion_status: str, validation_output, expectatio
     rule_table_values = [
         [
             rule_output["expectation_config"].get("expectation_type"),
-            {key: value for key, value in rule_output["expectation_config"]["kwargs"].items() if key != "batch_id"},
-            rule_output.get("success")
+            {
+                key: value
+                for key, value in rule_output["expectation_config"]["kwargs"].items()
+                if key != "batch_id"
+            },
+            rule_output.get("success"),
         ]
         for rule_output in validation_output["results"]
     ]
diff --git a/ads/feature_store/docs/source/release_notes.rst b/ads/feature_store/docs/source/release_notes.rst
@@ -18,7 +18,7 @@ Release Notes
         - `https://objectstorage.us-ashburn-1.oraclecloud.com/n/bigdatadatasciencelarge/b/service-conda-packs-fs/o/service_pack/cpu/PySpark_3.2_and_Feature_Store/1.0/fspyspark32_p38_cpu_v1#conda`
         -
       * - SERVICE_VERSION
-        - 0.1.212.master
+        - 0.1.218.master
         -
       * - Terraform Stack
         - `link <https://objectstorage.us-ashburn-1.oraclecloud.com/p/vZogtXWwHqbkGLeqyKiqBmVxdbR4MK4nyOBqDsJNVE4sHGUY5KFi4T3mOFGA3FOy/n/idogsu2ylimg/b/oci-feature-store/o/beta/terraform/feature-store-terraform.zip>`__
@@ -28,7 +28,9 @@ Release Notes
 Release notes: July 5, 2023
 
 * [FEATURE] Supporting Offline Feature Type COMPLEX
-* [FEATURE] Added k8 default version as v1.26.2
+* [FEATURE] Added k8 default version as v1.25.4
+* [FEATURE] Improvements in logging during materialisation of feature group and dataset and showcasing validation results during materialisation
+* [FIX] Fixed creation of singleton spark session without metastore id
 * [DOCS] Data Type update for Offline Feature Type COMPLEX
 * [DOCS] Updated terraform default version as 1.1.x
 
diff --git a/ads/feature_store/docs/source/terraform.rst b/ads/feature_store/docs/source/terraform.rst
@@ -165,10 +165,10 @@ A complete listing of the Terraform variables used in this stack are referenced
      - Value
      - Description
    * - `service_version`
-     - `0.1-master.26`
+     - `0.1.218.master`
      - The version of API to be deployed in customer tenancy.
    * - `spec_version`
-     - `0.1-master.26`
+     - `0.1.218.master`
      - The version of API specs to be deployed in customer tenancy.
    * - `deployment_name`
      - `DEFAULT_NAME`
diff --git a/ads/feature_store/execution_strategy/spark/spark_execution.py b/ads/feature_store/execution_strategy/spark/spark_execution.py
@@ -292,7 +292,7 @@ def _save_offline_dataframe(
 
         output_details = {
             "error_details": error_details,
-            "validation_output": str(validation_output),
+            "validation_output": str(validation_output) if validation_output else None,
             "commit_id": "commit_id",
             "feature_statistics": feature_statistics,
         }
@@ -427,7 +427,7 @@ def _save_dataset_input(self, dataset, dataset_job: DatasetJob):
 
         output_details = {
             "error_details": error_details,
-            "validation_output": str(validation_output),
+            "validation_output": str(validation_output) if validation_output else None,
             "commit_id": "commit_id",
             "feature_statistics": feature_statistics,
         }
diff --git a/tests/integration/feature_store/test_datatype_pandas_mixed.py b/tests/integration/feature_store/test_datatype_pandas_mixed.py
@@ -7,16 +7,28 @@
 
 
 class TestDataTypePandasMixed(FeatureStoreTestCase):
-    data_mixed = {'MixedColumn': ['John', 25, 'Emma', 30, 'Michael', 35]}
-    data_mixed_nan = {'MixedColumn': ['John', float('nan'), 'Emma', float('nan'), 'Michael', float('nan')]}
+    data_mixed = {"MixedColumn": ["John", 25, "Emma", 30, "Michael", 35]}
+    data_mixed_nan = {
+        "MixedColumn": [
+            "John",
+            float("nan"),
+            "Emma",
+            float("nan"),
+            "Michael",
+            float("nan"),
+        ]
+    }
     pandas_mixed_df = pd.DataFrame(data_mixed)
     pandas_mixed_df_nan = pd.DataFrame(data_mixed_nan)
 
     input_feature_details_mixed = [
-        FeatureDetail("MixedColumn").with_feature_type(FeatureType.STRING).with_order_number(1)]
+        FeatureDetail("MixedColumn")
+        .with_feature_type(FeatureType.STRING)
+        .with_order_number(1)
+    ]
 
     def define_feature_group_resource_with_pandas_mixed_infer_schema(
-            self, entity_id, feature_store_id
+        self, entity_id, feature_store_id
     ):
         feature_group_pandas_mixed = (
             FeatureGroup()
@@ -33,7 +45,7 @@ def define_feature_group_resource_with_pandas_mixed_infer_schema(
         return feature_group_pandas_mixed
 
     def define_feature_group_resource_with_pandas_mixed_infer_schema_nan(
-            self, entity_id, feature_store_id
+        self, entity_id, feature_store_id
     ):
         feature_group_pandas_mixed_1 = (
             FeatureGroup()
@@ -50,7 +62,7 @@ def define_feature_group_resource_with_pandas_mixed_infer_schema_nan(
         return feature_group_pandas_mixed_1
 
     def define_feature_group_resource_with_pandas_mixed_with_schema(
-            self, entity_id, feature_store_id
+        self, entity_id, feature_store_id
     ) -> "FeatureGroup":
         feature_group_pandas_mixed_schema = (
             FeatureGroup()
@@ -73,12 +85,17 @@ def test_feature_group_pandas_mixed_infer_schema(self):
         entity = self.create_entity_resource(fs)
         assert entity.oci_fs_entity.id
         try:
-            feature_group = self.define_feature_group_resource_with_pandas_mixed_infer_schema(
-                entity.oci_fs_entity.id, fs.oci_fs.id
+            feature_group = (
+                self.define_feature_group_resource_with_pandas_mixed_infer_schema(
+                    entity.oci_fs_entity.id, fs.oci_fs.id
+                )
             )
         except TypeError as e:
-            assert e.__str__() == "field MixedColumn: Can not merge type <class 'pyspark.sql.types.StringType'> " \
-                                  "and <class 'pyspark.sql.types.LongType'>"
+            assert (
+                e.__str__()
+                == "field MixedColumn: Can not merge type <class 'pyspark.sql.types.StringType'> "
+                "and <class 'pyspark.sql.types.LongType'>"
+            )
         self.clean_up_entity(entity)
         self.clean_up_feature_store(fs)
 
@@ -89,8 +106,10 @@ def test_feature_group_pandas_mixed_infer_schema_nan(self):
 
         entity = self.create_entity_resource(fs)
         assert entity.oci_fs_entity.id
-        feature_group = self.define_feature_group_resource_with_pandas_mixed_infer_schema_nan(
-            entity.oci_fs_entity.id, fs.oci_fs.id
+        feature_group = (
+            self.define_feature_group_resource_with_pandas_mixed_infer_schema_nan(
+                entity.oci_fs_entity.id, fs.oci_fs.id
+            )
         )
         feature_group.create()
         feature_group.materialise(self.pandas_mixed_df_nan)
@@ -109,8 +128,10 @@ def test_feature_group_pandas_mixed_with_schema(self):
         entity = self.create_entity_resource(fs)
         assert entity.oci_fs_entity.id
 
-        feature_group = self.define_feature_group_resource_with_pandas_mixed_with_schema(
-            entity.oci_fs_entity.id, fs.oci_fs.id
+        feature_group = (
+            self.define_feature_group_resource_with_pandas_mixed_with_schema(
+                entity.oci_fs_entity.id, fs.oci_fs.id
+            )
         )
 
         feature_group.create()