Skip to content

Commit ca61db6

Browse files
authored
Temporary fix to resolve circular import issue (#252)
2 parents 0b262c0 + eaa6e7f commit ca61db6

File tree

6 files changed

+76
-43
lines changed

6 files changed

+76
-43
lines changed

ads/feature_store/common/spark_session_singleton.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@
99

1010
from ads.common.decorator.runtime_dependency import OptionalDependency
1111
import os
12-
1312
from ads.common.oci_client import OCIClientFactory
14-
from ads.feature_store.common.utils.utility import get_env_bool
1513

1614
try:
1715
from delta import configure_spark_with_delta_pip
@@ -33,6 +31,31 @@
3331
raise
3432

3533

34+
def get_env_bool(env_var: str, default: bool = False) -> bool:
35+
"""
36+
:param env_var: Environment variable name
37+
:param default: Default environment variable value
38+
:return: Value of the boolean env variable
39+
"""
40+
env_val = os.getenv(env_var)
41+
if env_val is None:
42+
env_val = default
43+
else:
44+
env_val = env_val.lower()
45+
if env_val == "true":
46+
env_val = True
47+
elif env_val == "false":
48+
env_val = False
49+
else:
50+
raise ValueError(
51+
"For environment variable: {0} only string values T/true or F/false are allowed but: \
52+
{1} was provided.".format(
53+
env_var, env_val
54+
)
55+
)
56+
return env_val
57+
58+
3659
def developer_enabled():
3760
return get_env_bool("DEVELOPER_MODE", False)
3861

ads/feature_store/common/utils/utility.py

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# -*- coding: utf-8; -*-
33
import copy
44
import os
5+
56
# Copyright (c) 2023 Oracle and/or its affiliates.
67
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
78

@@ -63,25 +64,6 @@ def get_execution_engine_type(
6364
else ExecutionEngine.SPARK
6465
)
6566

66-
def get_env_bool(env_var: str, default: bool = False) -> bool:
67-
"""
68-
:param env_var: Environment variable name
69-
:param default: Default environment variable value
70-
:return: Value of the boolean env variable
71-
"""
72-
env_val = os.getenv(env_var)
73-
if env_val is None:
74-
env_val = default
75-
else:
76-
env_val = env_val.lower()
77-
if env_val == "true":
78-
env_val = True
79-
elif env_val == "false":
80-
env_val = False
81-
else:
82-
raise ValueError("For environment variable: {0} only string values T/true or F/false are allowed but: \
83-
{1} was provided.".format(env_var, env_val))
84-
return env_val
8567

8668
def get_metastore_id(feature_store_id: str):
8769
"""
@@ -177,6 +159,7 @@ def show_ingestion_summary(
177159

178160
def show_validation_summary(ingestion_status: str, validation_output, expectation_type):
179161
from tabulate import tabulate
162+
180163
statistics = validation_output["statistics"]
181164

182165
table_headers = (
@@ -201,8 +184,12 @@ def show_validation_summary(ingestion_status: str, validation_output, expectatio
201184
rule_table_values = [
202185
[
203186
rule_output["expectation_config"].get("expectation_type"),
204-
{key: value for key, value in rule_output["expectation_config"]["kwargs"].items() if key != "batch_id"},
205-
rule_output.get("success")
187+
{
188+
key: value
189+
for key, value in rule_output["expectation_config"]["kwargs"].items()
190+
if key != "batch_id"
191+
},
192+
rule_output.get("success"),
206193
]
207194
for rule_output in validation_output["results"]
208195
]

ads/feature_store/docs/source/release_notes.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Release Notes
1818
- `https://objectstorage.us-ashburn-1.oraclecloud.com/n/bigdatadatasciencelarge/b/service-conda-packs-fs/o/service_pack/cpu/PySpark_3.2_and_Feature_Store/1.0/fspyspark32_p38_cpu_v1#conda`
1919
-
2020
* - SERVICE_VERSION
21-
- 0.1.212.master
21+
- 0.1.218.master
2222
-
2323
* - Terraform Stack
2424
- `link <https://objectstorage.us-ashburn-1.oraclecloud.com/p/vZogtXWwHqbkGLeqyKiqBmVxdbR4MK4nyOBqDsJNVE4sHGUY5KFi4T3mOFGA3FOy/n/idogsu2ylimg/b/oci-feature-store/o/beta/terraform/feature-store-terraform.zip>`__
@@ -28,7 +28,9 @@ Release Notes
2828
Release notes: July 5, 2023
2929

3030
* [FEATURE] Supporting Offline Feature Type COMPLEX
31-
* [FEATURE] Added k8 default version as v1.26.2
31+
* [FEATURE] Added k8 default version as v1.25.4
32+
* [FEATURE] Improvements in logging during materialisation of feature group and dataset and showcasing validation results during materialisation
33+
* [FIX] Fixed creation of singleton spark session without metastore id
3234
* [DOCS] Data Type update for Offline Feature Type COMPLEX
3335
* [DOCS] Updated terraform default version as 1.1.x
3436

ads/feature_store/docs/source/terraform.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,10 +165,10 @@ A complete listing of the Terraform variables used in this stack are referenced
165165
- Value
166166
- Description
167167
* - `service_version`
168-
- `0.1-master.26`
168+
- `0.1.218.master`
169169
- The version of API to be deployed in customer tenancy.
170170
* - `spec_version`
171-
- `0.1-master.26`
171+
- `0.1.218.master`
172172
- The version of API specs to be deployed in customer tenancy.
173173
* - `deployment_name`
174174
- `DEFAULT_NAME`

ads/feature_store/execution_strategy/spark/spark_execution.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def _save_offline_dataframe(
292292

293293
output_details = {
294294
"error_details": error_details,
295-
"validation_output": str(validation_output),
295+
"validation_output": str(validation_output) if validation_output else None,
296296
"commit_id": "commit_id",
297297
"feature_statistics": feature_statistics,
298298
}
@@ -427,7 +427,7 @@ def _save_dataset_input(self, dataset, dataset_job: DatasetJob):
427427

428428
output_details = {
429429
"error_details": error_details,
430-
"validation_output": str(validation_output),
430+
"validation_output": str(validation_output) if validation_output else None,
431431
"commit_id": "commit_id",
432432
"feature_statistics": feature_statistics,
433433
}

tests/integration/feature_store/test_datatype_pandas_mixed.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,28 @@
77

88

99
class TestDataTypePandasMixed(FeatureStoreTestCase):
10-
data_mixed = {'MixedColumn': ['John', 25, 'Emma', 30, 'Michael', 35]}
11-
data_mixed_nan = {'MixedColumn': ['John', float('nan'), 'Emma', float('nan'), 'Michael', float('nan')]}
10+
data_mixed = {"MixedColumn": ["John", 25, "Emma", 30, "Michael", 35]}
11+
data_mixed_nan = {
12+
"MixedColumn": [
13+
"John",
14+
float("nan"),
15+
"Emma",
16+
float("nan"),
17+
"Michael",
18+
float("nan"),
19+
]
20+
}
1221
pandas_mixed_df = pd.DataFrame(data_mixed)
1322
pandas_mixed_df_nan = pd.DataFrame(data_mixed_nan)
1423

1524
input_feature_details_mixed = [
16-
FeatureDetail("MixedColumn").with_feature_type(FeatureType.STRING).with_order_number(1)]
25+
FeatureDetail("MixedColumn")
26+
.with_feature_type(FeatureType.STRING)
27+
.with_order_number(1)
28+
]
1729

1830
def define_feature_group_resource_with_pandas_mixed_infer_schema(
19-
self, entity_id, feature_store_id
31+
self, entity_id, feature_store_id
2032
):
2133
feature_group_pandas_mixed = (
2234
FeatureGroup()
@@ -33,7 +45,7 @@ def define_feature_group_resource_with_pandas_mixed_infer_schema(
3345
return feature_group_pandas_mixed
3446

3547
def define_feature_group_resource_with_pandas_mixed_infer_schema_nan(
36-
self, entity_id, feature_store_id
48+
self, entity_id, feature_store_id
3749
):
3850
feature_group_pandas_mixed_1 = (
3951
FeatureGroup()
@@ -50,7 +62,7 @@ def define_feature_group_resource_with_pandas_mixed_infer_schema_nan(
5062
return feature_group_pandas_mixed_1
5163

5264
def define_feature_group_resource_with_pandas_mixed_with_schema(
53-
self, entity_id, feature_store_id
65+
self, entity_id, feature_store_id
5466
) -> "FeatureGroup":
5567
feature_group_pandas_mixed_schema = (
5668
FeatureGroup()
@@ -73,12 +85,17 @@ def test_feature_group_pandas_mixed_infer_schema(self):
7385
entity = self.create_entity_resource(fs)
7486
assert entity.oci_fs_entity.id
7587
try:
76-
feature_group = self.define_feature_group_resource_with_pandas_mixed_infer_schema(
77-
entity.oci_fs_entity.id, fs.oci_fs.id
88+
feature_group = (
89+
self.define_feature_group_resource_with_pandas_mixed_infer_schema(
90+
entity.oci_fs_entity.id, fs.oci_fs.id
91+
)
7892
)
7993
except TypeError as e:
80-
assert e.__str__() == "field MixedColumn: Can not merge type <class 'pyspark.sql.types.StringType'> " \
81-
"and <class 'pyspark.sql.types.LongType'>"
94+
assert (
95+
e.__str__()
96+
== "field MixedColumn: Can not merge type <class 'pyspark.sql.types.StringType'> "
97+
"and <class 'pyspark.sql.types.LongType'>"
98+
)
8299
self.clean_up_entity(entity)
83100
self.clean_up_feature_store(fs)
84101

@@ -89,8 +106,10 @@ def test_feature_group_pandas_mixed_infer_schema_nan(self):
89106

90107
entity = self.create_entity_resource(fs)
91108
assert entity.oci_fs_entity.id
92-
feature_group = self.define_feature_group_resource_with_pandas_mixed_infer_schema_nan(
93-
entity.oci_fs_entity.id, fs.oci_fs.id
109+
feature_group = (
110+
self.define_feature_group_resource_with_pandas_mixed_infer_schema_nan(
111+
entity.oci_fs_entity.id, fs.oci_fs.id
112+
)
94113
)
95114
feature_group.create()
96115
feature_group.materialise(self.pandas_mixed_df_nan)
@@ -109,8 +128,10 @@ def test_feature_group_pandas_mixed_with_schema(self):
109128
entity = self.create_entity_resource(fs)
110129
assert entity.oci_fs_entity.id
111130

112-
feature_group = self.define_feature_group_resource_with_pandas_mixed_with_schema(
113-
entity.oci_fs_entity.id, fs.oci_fs.id
131+
feature_group = (
132+
self.define_feature_group_resource_with_pandas_mixed_with_schema(
133+
entity.oci_fs_entity.id, fs.oci_fs.id
134+
)
114135
)
115136

116137
feature_group.create()

0 commit comments

Comments
 (0)