Skip to content

Commit 09c31ec

Browse files
authored
added terraform version in the command (#249)
2 parents e9e76b0 + d2646a8 commit 09c31ec

18 files changed

+130
-375
lines changed

ads/feature_store/common/spark_session_singleton.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ def __init__(self, metastore_id: str = None):
7575
"spark.hadoop.oracle.dcat.metastore.id", metastore_id
7676
).config(
7777
"spark.sql.warehouse.dir", metastore.default_managed_table_location
78-
)\
79-
.config("spark.driver.memory", "16G")
78+
).config(
79+
"spark.driver.memory", "16G"
80+
)
8081

8182
if developer_enabled():
8283
# Configure spark session with delta jars only in developer mode. In other cases,

ads/feature_store/common/utils/feature_schema_mapper.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ def map_feature_type_to_pandas(feature_type):
241241
raise TypeError(f"Feature Type {feature_type} is not supported for pandas")
242242

243243

244-
245244
def map_spark_type_to_stats_data_type(spark_type):
246245
"""Maps the spark data types to MLM library data types
247246
args:

ads/feature_store/common/utils/transformation_query_validator.py

Lines changed: 0 additions & 96 deletions
This file was deleted.

ads/feature_store/common/utils/utility.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,10 @@ def get_features(
154154
return features
155155

156156

157-
def get_schema_from_pandas_df(df: pd.DataFrame):
158-
spark = SparkSessionSingleton().get_spark_session()
157+
def get_schema_from_pandas_df(df: pd.DataFrame, feature_store_id: str):
158+
spark = SparkSessionSingleton(
159+
get_metastore_id(feature_store_id)
160+
).get_spark_session()
159161
converted_df = spark.createDataFrame(df)
160162
return get_schema_from_spark_df(converted_df)
161163

@@ -174,27 +176,29 @@ def get_schema_from_spark_df(df: DataFrame):
174176
return schema_details
175177

176178

177-
def get_schema_from_df(data_frame: Union[DataFrame, pd.DataFrame]) -> List[dict]:
179+
def get_schema_from_df(
180+
data_frame: Union[DataFrame, pd.DataFrame], feature_store_id: str
181+
) -> List[dict]:
178182
"""
179183
Given a DataFrame, returns a list of dictionaries that describe its schema.
180184
If the DataFrame is a pandas DataFrame, it uses pandas methods to get the schema.
181185
If it's a PySpark DataFrame, it uses PySpark methods to get the schema.
182186
"""
183187
if isinstance(data_frame, pd.DataFrame):
184-
return get_schema_from_pandas_df(data_frame)
188+
return get_schema_from_pandas_df(data_frame, feature_store_id)
185189
else:
186190
return get_schema_from_spark_df(data_frame)
187191

188192

189193
def get_input_features_from_df(
190-
data_frame: Union[DataFrame, pd.DataFrame]
194+
data_frame: Union[DataFrame, pd.DataFrame], feature_store_id: str
191195
) -> List[FeatureDetail]:
192196
"""
193197
Given a DataFrame, returns a list of FeatureDetail objects that represent its input features.
194198
Each FeatureDetail object contains information about a single input feature, such as its name, data type, and
195199
whether it's categorical or numerical.
196200
"""
197-
schema_details = get_schema_from_df(data_frame)
201+
schema_details = get_schema_from_df(data_frame, feature_store_id)
198202
feature_details = []
199203

200204
for schema_detail in schema_details:
@@ -263,7 +267,7 @@ def largest_matching_subset_of_primary_keys(left_feature_group, right_feature_gr
263267

264268

265269
def convert_pandas_datatype_with_schema(
266-
raw_feature_details: List[dict], input_df: pd.DataFrame
270+
raw_feature_details: List[dict], input_df: pd.DataFrame
267271
) -> pd.DataFrame:
268272
feature_detail_map = {}
269273
columns_to_remove = []
@@ -280,21 +284,25 @@ def convert_pandas_datatype_with_schema(
280284
.where(pd.notnull(input_df[column]), None)
281285
)
282286
else:
283-
logger.warning("column" + column + "doesn't exist in the input feature details")
287+
logger.warning(
288+
"column" + column + "doesn't exist in the input feature details"
289+
)
284290
columns_to_remove.append(column)
285-
return input_df.drop(columns = columns_to_remove)
291+
return input_df.drop(columns=columns_to_remove)
286292

287293

288294
def convert_spark_dataframe_with_schema(
289-
raw_feature_details: List[dict], input_df: DataFrame
295+
raw_feature_details: List[dict], input_df: DataFrame
290296
) -> DataFrame:
291297
feature_detail_map = {}
292298
columns_to_remove = []
293299
for feature_details in raw_feature_details:
294300
feature_detail_map[feature_details.get("name")] = feature_details
295301
for column in input_df.columns:
296302
if column not in feature_detail_map.keys():
297-
logger.warning("column" + column + "doesn't exist in the input feature details")
303+
logger.warning(
304+
"column" + column + "doesn't exist in the input feature details"
305+
)
298306
columns_to_remove.append(column)
299307

300308
return input_df.drop(*columns_to_remove)
Loading
Loading
Loading

ads/feature_store/docs/source/quickstart.rst

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,19 @@
11
Quick start
22
************
3+
1. Create a `OCI notebook session <https://docs.oracle.com/en-us/iaas/data-science/using/create-notebook-sessions.htm>`__ to access jupyterlab interface.
34

4-
- Install the ``fspyspark32_p38_cpu_v1`` plugin
5+
2. Open the terminal in the notebook session and install the ``fspyspark32_p38_cpu_v1`` plugin
56

67
.. code-block:: shell
78
8-
odsc conda install --uri https://objectstorage.us-ashburn-1.oraclecloud.com/p/b-LDcmxLYuCi3gIgvLV8KPAIih8CD2vgtBwgjEtY_CnE3HZyBHb00JGsS7OV5ls5/n/ociodscdev/b/ads_conda_pack_builds/o/PySpark_3/teamcity_20230511_061348_38940786/f227145b7ee5fc1c73a69ebaa671b81e/PySpark_3.2_and_Feature_Store.tar.gz
9+
odsc conda install --uri https://objectstorage.us-ashburn-1.oraclecloud.com/n/bigdatadatasciencelarge/b/service-conda-packs-fs/o/service_pack/cpu/PySpark_3.2_and_Feature_Store/1.0/fspyspark32_p38_cpu_v1#conda
10+
11+
3. Download the notebooks from the example notebook section.
12+
13+
.. seealso::
14+
Refer :ref:`Notebook Examples` to check out more example for using feature store
15+
16+
4. Upload the notebook in the notebook session and run the notebook after replacing the required variables.
917

1018

1119
Background reading to understand the concepts of Feature Store and OCI Data Science:
@@ -26,9 +34,6 @@ Background reading to understand the concepts of Feature Store and OCI Data Scie
2634

2735
Refer `Terraform section <https://objectstorage.us-ashburn-1.oraclecloud.com/p/hh2NOgFJbVSg4amcLM3G3hkTuHyBD-8aE_iCsuZKEvIav1Wlld-3zfCawG4ycQGN/n/ociodscdev/b/oci-feature-store/o/beta/index.html#document-terraform>`__ for setting up feature store server.
2836

29-
.. seealso::
30-
Refer :ref:`Notebook Examples` to check out more example for using feature store
31-
3237
.. warning::
3338

3439
1. Initial implementation will not allow parallel execution of similar logical constructs. Creation will be sequential.

ads/feature_store/docs/source/release_notes.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ Release Notes
2828
Release notes: July 5, 2023
2929

3030
* [FEATURE] Supporting Offline Feature Type COMPLEX
31-
* [[DOCS] Data Type update for Offline Feature Type COMPLEX
31+
* [FEATURE] Added k8 default version as v1.26.2
32+
* [DOCS] Data Type update for Offline Feature Type COMPLEX
33+
* [DOCS] Updated terraform default version as 1.1.x
3234

3335
1.0
3436
---

ads/feature_store/docs/source/terraform.rst

Lines changed: 58 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ Feature Store users need to provide the following access permissions in order to
2828

2929
.. code-block:: shell
3030
31-
define tenancy <feature store service tenancy> as <feature store service tenancy ocid>
32-
endorse group <feature store user group> to read repos in tenancy <feature store service tenancy>
31+
define tenancy service_tenancy as ocid1.tenancy.oc1..aaaaaaaahzy3x4boh7ipxyft2rowu2xeglvanlfewudbnueugsieyuojkldq
32+
endorse group <feature store user group> to read repos in tenancy service_tenancy
3333
allow group <feature store user group> to manage orm-stacks in compartment <compartmentName>
3434
allow group <feature store user group> to manage orm-jobs in compartment <compartmentName>
3535
allow group <feature store user group> to manage object-family in compartment <compartmentName>
@@ -82,46 +82,76 @@ Prerequisites
8282

8383
1. Install `oci-cli <https://docs.oracle.com/en-us/iaas/Content/API/Concepts/cliconcepts.htm>`__ if not installed
8484

85-
2. Download the stack from ``Terraform Stack`` column in :ref:`Release Notes`.
86-
87-
3. (Optional: Skip if default deployment is required) To use this file just copy the example ``terraform.tfvars.example`` and save it in the outermost directory.
88-
89-
4. (Optional: Skip if default deployment is required) Next, rename the file to ``terraform.tfvars``. You can override the example values set in this file.
90-
91-
9285
Steps
9386
#####
9487

9588
.. note::
9689

9790
Refer :ref:`Release Notes` for getting the latest conda pack and ``SERVICE_VERSION``. Remember to replace the values within angle brackets ("<>" symbols) in the command above with the relevant values for your environment. Also, Refer :ref:`User Policies` to create feature store stack for non admin users. No policies are explicitly required for admin user.
9891

99-
100-
1. Download the stack from ``Terraform Stack`` column in :ref:`Release Notes`.
101-
102-
2. Run the shell command.
92+
1. Run the shell command.
10393
.. code-block:: shell
10494
105-
oci resource-manager stack create \
106-
--compartment-id <compartment-id> \
107-
--config-source <path-to-downloaded-zip-file> \
108-
--variables '{
109-
"service_version": "<SERVICE_VERSION>",
110-
"tenancy_ocid": "<TENANCY_OCID>",
111-
"compartment_ocid": "<COMPARTMENT_OCID>",
112-
"region": "<REGION>",
113-
"user_ocid": "<USER_OCID>"
114-
}' \
115-
--display-name "Feature Store Stack" \
116-
--working-directory "feature-store-terraform" \
117-
| tee stack_output.json \
118-
&& stack_id=$(jq -r '.data."id"' stack_output.json) \
119-
&& oci resource-manager job create-apply-job \
95+
rm -f feature-store-terraform.zip \
96+
&& wget https://objectstorage.us-ashburn-1.oraclecloud.com/p/vZogtXWwHqbkGLeqyKiqBmVxdbR4MK4nyOBqDsJNVE4sHGUY5KFi4T3mOFGA3FOy/n/idogsu2ylimg/b/oci-feature-store/o/beta/terraform/feature-store-terraform.zip \
97+
&& oci resource-manager stack create \
98+
--compartment-id <compartment-id> \
99+
--config-source <path-to-downloaded-zip-file> \
100+
--terraform-version 1.1.x \
101+
--variables '{
102+
"service_version": "<SERVICE_VERSION>",
103+
"tenancy_ocid": "<TENANCY_OCID>",
104+
"compartment_ocid": "<COMPARTMENT_OCID>",
105+
"region": "<REGION>",
106+
"user_ocid": "<USER_OCID>"
107+
}' \
108+
--display-name "Feature Store Stack" \
109+
--working-directory "feature-store-terraform" \
110+
| tee stack_output.json \
111+
&& stack_id=$(jq -r '.data."id"' stack_output.json) \
112+
&& oci resource-manager job create-apply-job \
120113
--execution-plan-strategy AUTO_APPROVED \
121114
--stack-id $stack_id \
122115
--wait-for-state SUCCEEDED \
123116
--wait-for-state FAILED
124117
118+
Update Feature Store Stack with the Latest using OCI CLI
119+
==============================
120+
121+
Prerequisites
122+
#############
123+
124+
1. Install `oci-cli <https://docs.oracle.com/en-us/iaas/Content/API/Concepts/cliconcepts.htm>`__ if not installed
125+
2. In order to update the stack, get the <STACK_ID> from console by navigating to `OCI Resource Manager <https://www.oracle.com/devops/resource-manager/>`__.
126+
127+
.. figure:: figures/resource_manager.png
128+
129+
.. figure:: figures/resource_manager_home.png
130+
131+
.. figure:: figures/resource_manager_feature_store_stack.png
132+
133+
134+
Steps
135+
#####
136+
137+
.. note::
138+
139+
Refer :ref:`Release Notes` for getting the latest conda pack and ``SERVICE_VERSION``. Remember to replace the values within angle brackets ("<>" symbols) in the command above with the relevant values for your environment. Also, Refer :ref:`User Policies` to create feature store stack for non admin users. No policies are explicitly required for admin user.
140+
141+
1. Run the shell command.
142+
.. code-block:: shell
143+
144+
rm -f feature-store-terraform.zip \
145+
&& wget https://objectstorage.us-ashburn-1.oraclecloud.com/p/vZogtXWwHqbkGLeqyKiqBmVxdbR4MK4nyOBqDsJNVE4sHGUY5KFi4T3mOFGA3FOy/n/idogsu2ylimg/b/oci-feature-store/o/beta/terraform/feature-store-terraform.zip \
146+
&& oci resource-manager stack update \
147+
--stack-id <STACK_ID> \
148+
--variables '{"service_version": "<SERVICE_VERSION>", "tenancy_ocid": "<TENANCY_OCID>", "compartment_ocid": "<COMPARTMENT_OCID>", "region": "<REGION>", "user_ocid": "<USER_OCID>"}' \
149+
--config-source "feature-store-terraform.zip" \
150+
--working-directory "feature-store-terraform" --force \
151+
&& oci resource-manager job create-apply-job \
152+
--execution-plan-strategy AUTO_APPROVED \
153+
--stack-id <STACK_ID> \
154+
--wait-for-state SUCCEEDED --wait-for-state FAILED
125155
126156
Terraform Variables (Advanced)
127157
===============================

0 commit comments

Comments
 (0)