Skip to content

Commit 4333af9

Browse files
committed
added integration test cases
1 parent 806d500 commit 4333af9

File tree

3 files changed

+267
-1
lines changed

3 files changed

+267
-1
lines changed

ads/feature_store/feature_group.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def with_transformation_kwargs(
353353
"""
354354
return self.set_spec(
355355
self.CONST_TRANSFORMATION_KWARGS,
356-
Base64EncoderDecoder.encode(json.dumps(transformation_kwargs)),
356+
Base64EncoderDecoder.encode(json.dumps(transformation_kwargs or {})),
357357
)
358358

359359
@property
@@ -1085,6 +1085,7 @@ def check_resource_materialization(self):
10851085
"""Checks whether the target Delta table for this resource has been materialized in Spark.
10861086
If the target Delta table doesn't exist, raises a NotMaterializedError with the type and name of this resource.
10871087
"""
1088+
print(self.target_delta_table())
10881089
if not self.spark_engine.is_delta_table_exists(self.target_delta_table()):
10891090
raise NotMaterializedError(self.type, self.name)
10901091

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
import ast
2+
import json
3+
4+
from ads.feature_store.dataset import Dataset
5+
from ads.feature_store.statistics_config import StatisticsConfig
6+
from tests.integration.feature_store.test_base import FeatureStoreTestCase
7+
from ads.feature_store.feature_group import FeatureGroup
8+
9+
10+
class TestPartitioningForFeatureGroupAndDataset(FeatureStoreTestCase):
11+
"""Contains integration tests for partitioning of feature groups and datasets"""
12+
13+
def define_feature_group_resource_with_partitioning(
14+
self, entity_id, feature_store_id, partitioning_keys
15+
) -> "FeatureGroup":
16+
feature_group_resource = (
17+
FeatureGroup()
18+
.with_description("feature group with statistics disabled")
19+
.with_compartment_id(self.COMPARTMENT_ID)
20+
.with_name(self.get_name("petals2"))
21+
.with_entity_id(entity_id)
22+
.with_feature_store_id(feature_store_id)
23+
.with_primary_keys([])
24+
.with_partition_keys(partitioning_keys)
25+
.with_input_feature_details(self.INPUT_FEATURE_DETAILS)
26+
.with_statistics_config(False)
27+
)
28+
return feature_group_resource
29+
30+
def define_dataset_resource_with_partitioning(
31+
self, entity_id, feature_store_id, feature_group_name, partitioning_keys
32+
) -> "Dataset":
33+
name = self.get_name("petals_ds")
34+
dataset_resource = (
35+
Dataset()
36+
.with_description("dataset description")
37+
.with_compartment_id(self.COMPARTMENT_ID)
38+
.with_name(name)
39+
.with_entity_id(entity_id)
40+
.with_feature_store_id(feature_store_id)
41+
.with_query(f"SELECT * FROM `{entity_id}`.{feature_group_name}")
42+
.with_statistics_config(
43+
StatisticsConfig(True, columns=["sepal_length", "petal_width"])
44+
)
45+
.with_partition_keys(partitioning_keys)
46+
)
47+
return dataset_resource
48+
49+
def test_feature_group_materialization_with_partitioning_keys(self):
50+
fs = self.define_feature_store_resource().create()
51+
assert fs.oci_fs.id
52+
53+
entity = self.create_entity_resource(fs)
54+
assert entity.oci_fs_entity.id
55+
56+
fg = self.define_feature_group_resource_with_partitioning(
57+
entity.oci_fs_entity.id, fs.oci_fs.id, ['class']
58+
).create()
59+
assert fg.oci_feature_group.id
60+
61+
fg.materialise(self.data)
62+
63+
history_df = fg.history()
64+
history_df_dict = json.loads(history_df.toJSON().collect()[0])
65+
materialized_partition_keys = ast.literal_eval(history_df_dict.get("operationParameters").get("partitionBy"))
66+
67+
assert len(materialized_partition_keys) == 1
68+
assert materialized_partition_keys[0] == "class"
69+
70+
self.clean_up_feature_group(fg)
71+
self.clean_up_entity(entity)
72+
self.clean_up_feature_store(fs)
73+
74+
def test_feature_group_materialization_without_partitioning_keys(self):
75+
fs = self.define_feature_store_resource().create()
76+
assert fs.oci_fs.id
77+
78+
entity = self.create_entity_resource(fs)
79+
assert entity.oci_fs_entity.id
80+
81+
fg = self.define_feature_group_resource_with_partitioning(
82+
entity.oci_fs_entity.id, fs.oci_fs.id, None
83+
).create()
84+
assert fg.oci_feature_group.id
85+
86+
fg.materialise(self.data)
87+
88+
history_df = fg.history()
89+
history_df_dict = json.loads(history_df.toJSON().collect()[0])
90+
materialized_partition_keys = ast.literal_eval(history_df_dict.get("operationParameters").get("partitionBy"))
91+
92+
assert len(materialized_partition_keys) == 0
93+
94+
self.clean_up_feature_group(fg)
95+
self.clean_up_entity(entity)
96+
self.clean_up_feature_store(fs)
97+
98+
def test_dataset_materialization_with_partitioning_keys(self):
99+
fs = self.define_feature_store_resource().create()
100+
assert fs.oci_fs.id
101+
102+
entity = self.create_entity_resource(fs)
103+
assert entity.oci_fs_entity.id
104+
105+
fg = self.define_feature_group_resource(
106+
entity.oci_fs_entity.id, fs.oci_fs.id
107+
).create()
108+
109+
assert fg.oci_feature_group.id
110+
fg.materialise(self.data)
111+
112+
dataset = self.define_dataset_resource_with_partitioning(
113+
entity.oci_fs_entity.id, fs.oci_fs.id, fg.oci_feature_group.name, ['class']
114+
).create()
115+
assert dataset.oci_dataset.id
116+
117+
dataset.materialise()
118+
history_df = dataset.history()
119+
history_df_dict = json.loads(history_df.toJSON().collect()[0])
120+
materialized_partition_keys = ast.literal_eval(history_df_dict.get("operationParameters").get("partitionBy"))
121+
122+
assert len(materialized_partition_keys) == 1
123+
assert materialized_partition_keys[0] == "class"
124+
125+
self.clean_up_dataset(dataset)
126+
self.clean_up_feature_group(fg)
127+
self.clean_up_entity(entity)
128+
self.clean_up_feature_store(fs)
129+
130+
def test_dataset_materialization_without_partitioning_keys(self):
131+
fs = self.define_feature_store_resource().create()
132+
assert fs.oci_fs.id
133+
134+
entity = self.create_entity_resource(fs)
135+
assert entity.oci_fs_entity.id
136+
137+
fg = self.define_feature_group_resource(
138+
entity.oci_fs_entity.id, fs.oci_fs.id
139+
).create()
140+
141+
assert fg.oci_feature_group.id
142+
fg.materialise(self.data)
143+
144+
dataset = self.define_dataset_resource_with_partitioning(
145+
entity.oci_fs_entity.id, fs.oci_fs.id, fg.oci_feature_group.name, None
146+
).create()
147+
assert dataset.oci_dataset.id
148+
149+
dataset.materialise()
150+
history_df = dataset.history()
151+
history_df_dict = json.loads(history_df.toJSON().collect()[0])
152+
materialized_partition_keys = ast.literal_eval(history_df_dict.get("operationParameters").get("partitionBy"))
153+
154+
assert len(materialized_partition_keys) == 0
155+
156+
self.clean_up_dataset(dataset)
157+
self.clean_up_feature_group(fg)
158+
self.clean_up_entity(entity)
159+
self.clean_up_feature_store(fs)
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from ads.feature_store.feature_group import FeatureGroup
2+
from tests.integration.feature_store.test_base import FeatureStoreTestCase
3+
4+
class TestFeatureGroupWithKwargsTransformation(FeatureStoreTestCase):
5+
"""Contains integration tests for Feature Group Kwargs supported transformation."""
6+
7+
def define_feature_group_resource_with_transformation(
8+
self, entity_id, feature_store_id, transformation_id, transformation_kwargs
9+
) -> "FeatureGroup":
10+
feature_group_resource = (
11+
FeatureGroup()
12+
.with_description("feature group with statistics disabled")
13+
.with_compartment_id(self.COMPARTMENT_ID)
14+
.with_name(self.get_name("petals2"))
15+
.with_entity_id(entity_id)
16+
.with_feature_store_id(feature_store_id)
17+
.with_primary_keys([])
18+
.with_partition_keys([])
19+
.with_input_feature_details(self.INPUT_FEATURE_DETAILS)
20+
.with_statistics_config(False)
21+
.with_transformation_id(transformation_id)
22+
.with_transformation_kwargs(transformation_kwargs)
23+
)
24+
return feature_group_resource
25+
26+
27+
def test_feature_group_materialization_with_kwargs_supported_transformation(self):
28+
fs = self.define_feature_store_resource().create()
29+
assert fs.oci_fs.id
30+
31+
entity = self.create_entity_resource(fs)
32+
assert entity.oci_fs_entity.id
33+
34+
transformation = self.create_transformation_resource(fs)
35+
transformation_kwargs = {
36+
"is_area_enabled": True
37+
}
38+
39+
fg = self.define_feature_group_resource_with_transformation(
40+
entity.oci_fs_entity.id, fs.oci_fs.id, transformation.oci_fs_transformation.id, transformation_kwargs
41+
).create()
42+
assert fg.oci_feature_group.id
43+
44+
fg.materialise(self.data)
45+
46+
df = fg.preview(row_count=1)
47+
48+
assert "petal_area" in df.columns
49+
assert "sepal_area" in df.columns
50+
51+
self.clean_up_feature_group(fg)
52+
self.clean_up_transformation(transformation)
53+
self.clean_up_entity(entity)
54+
self.clean_up_feature_store(fs)
55+
56+
def test_feature_group_materialization_with_kwargs_supported_transformation_with_passing_kwargs_as_empty_dict(self):
57+
fs = self.define_feature_store_resource().create()
58+
assert fs.oci_fs.id
59+
60+
entity = self.create_entity_resource(fs)
61+
assert entity.oci_fs_entity.id
62+
63+
transformation = self.create_transformation_resource(fs)
64+
65+
fg = self.define_feature_group_resource_with_transformation(
66+
entity.oci_fs_entity.id, fs.oci_fs.id, transformation.oci_fs_transformation.id, {}
67+
).create()
68+
assert fg.oci_feature_group.id
69+
70+
fg.materialise(self.data)
71+
72+
df = fg.preview(row_count=1)
73+
74+
assert "petal_area" not in df.columns
75+
assert "sepal_area" not in df.columns
76+
77+
self.clean_up_feature_group(fg)
78+
self.clean_up_transformation(transformation)
79+
self.clean_up_entity(entity)
80+
self.clean_up_feature_store(fs)
81+
82+
def test_feature_group_materialization_with_kwargs_supported_transformation_with_passing_kwargs_as_None(self):
83+
fs = self.define_feature_store_resource().create()
84+
assert fs.oci_fs.id
85+
86+
entity = self.create_entity_resource(fs)
87+
assert entity.oci_fs_entity.id
88+
89+
transformation = self.create_transformation_resource(fs)
90+
91+
fg = self.define_feature_group_resource_with_transformation(
92+
entity.oci_fs_entity.id, fs.oci_fs.id, transformation.oci_fs_transformation.id, None
93+
).create()
94+
assert fg.oci_feature_group.id
95+
96+
fg.materialise(self.data)
97+
98+
df = fg.preview(row_count=1)
99+
100+
assert "petal_area" not in df.columns
101+
assert "sepal_area" not in df.columns
102+
103+
self.clean_up_feature_group(fg)
104+
self.clean_up_transformation(transformation)
105+
self.clean_up_entity(entity)
106+
self.clean_up_feature_store(fs)

0 commit comments

Comments
 (0)