added docs

KshitizLohia · KshitizLohia · commit 71987477b3a3 · 2023-11-07T16:30:06.000+05:30
diff --git a/ads/feature_store/docs/source/release_notes.rst b/ads/feature_store/docs/source/release_notes.rst
@@ -4,6 +4,43 @@
 Release Notes
 =============
 
+1.0.4
+-----
+.. note::
+
+    .. list-table::
+      :header-rows: 1
+
+      * - Package Name
+        - Latest Version
+        - Notes
+      * - Conda pack
+        - `fs_pyspark32_p38_cpu_v2`
+        -
+      * - SERVICE_VERSION
+        - 0.1.256.master
+        -
+      * - ADS_VERSION
+        - oracle-ads==2.9.0rc0
+        - `https://github.com/oracle/accelerated-data-science/releases/tag/v2.9.0rc0`
+      * - Terraform Stack
+        - `link <https://objectstorage.us-ashburn-1.oraclecloud.com/p/vZogtXWwHqbkGLeqyKiqBmVxdbR4MK4nyOBqDsJNVE4sHGUY5KFi4T3mOFGA3FOy/n/idogsu2ylimg/b/oci-feature-store/o/beta/terraform/feature-store-terraform.zip>`__
+        -
+
+
+Release notes: November 15, 2023
+
+* [FEATURE] Streaming data-frame support in ``FeatureGroup`` and ``Dataset`` construct
+* [FEATURE] Support for custom spark transformations using ``Transformation``  construct
+* [MAINTENANCE] Decouple feature store client with oci client
+* [MAINTENANCE] Upgrade of ``mlm`` version to 1.0.2
+* [MAINTENANCE] Upgrade of ``great-expectations`` version to 0.17.19
+* [UI] Addition of dataset jobs UI tab
+* [UI] Addition of feature group jobs UI tab
+* [UI] Addition of feature store landing page
+* [DOCS] Addition of class documentation for feature store
+* [CONDA] Release of feature store v2 conda pack ``fspyspark32_p38_cpu_v2``
+
 1.0.3
 -----
 .. note::
diff --git a/ads/feature_store/docs/source/transformation.rst b/ads/feature_store/docs/source/transformation.rst
@@ -3,6 +3,118 @@ Transformation
 
 Transformations in a feature store refers to the operations and processes applied to raw data to create, modify or derive new features that can be used as inputs for ML Models. These transformations are crucial for improving the quality, relevance and usefulness of features which in turn can enhance the performance of ml models. It is an object that represents a transformation applied on the feature group and can be a pandas transformation or spark sql transformation.
 
+* ``TransformationMode.PANDAS``: Pandas Transformation allows users to do the transformation using native pandas functionality.
+* ``TransformationMode.SQL``: Spark SQL brings native support for SQL to Spark. Users generally can give the spark transformation that they wish to do using spark SQL.
+* ``TransformationMode.SPARK``: Spark Transformation allows users to do the transformation using native spark functionality.
+
+.. tabs::
+
+  .. code-tab:: Python3
+    :caption: TransformationMode.SQL
+
+    def transactions_df(transactions_batch):
+        sql_query = f"select id, cc_num, amount from {transactions_batch}"
+        return sql_query
+
+    transformation = (
+        Transformation()
+         .with_description("Feature store description")
+         .with_compartment_id(os.environ["PROJECT_COMPARTMENT_OCID"])
+         .with_display_name("FeatureStore")
+         .with_feature_store_id(feature_store.id)
+         .with_transformation_mode(TransformationMode.SQL)
+         .with_source_code_function(transactions_df)
+    )
+    transformation.create()
+
+  .. code-tab:: Python3
+    :caption: TransformationMode.PANDAS
+
+    def chained_transformation(patient_result_df, **transformation_args):
+        def label_encoder_transformation(patient_result_df, **transformation_args):
+            from sklearn.preprocessing import LabelEncoder
+            # creating instance of labelencoder
+            labelencoder = LabelEncoder()
+            result_df = patient_result_df.copy()
+            column_labels= transformation_args.get("label_encode_column")
+            if isinstance(column_labels,list):
+                for col in column_labels:
+                    result_df[col] = labelencoder.fit_transform(result_df[col])
+            elif isinstance(column_labels, str):
+                 result_df[column_labels] = labelencoder.fit_transform(result_df[column_labels])
+            else:
+                return None
+            return result_df
+
+        def min_max_scaler(patient_result_df, **transformation_args):
+            from sklearn.preprocessing import MinMaxScaler
+            final_result_df = patient_result_df.copy()
+            scaler = MinMaxScaler(feature_range=(0, 1))
+            column_labels= transformation_args.get("scaling_column_labels")
+            final_result_df[column_labels] = scaler.fit_transform(final_result_df[column_labels])
+            return patient_result_df
+
+        def feature_removal(input_df, **transformation_args):
+            output_df = input_df.copy()
+            output_df.drop(transformation_args.get("redundant_feature_label"), axis=1, inplace=True)
+            return output_df
+
+        out1 = label_encoder_transformation(patient_result_df, **transformation_args)
+        out2 = min_max_scaler(out1, **transformation_args)
+        return feature_removal(out2, **transformation_args)
+
+    transformation_args = {
+        "label_encode_column": ["SOURCE"],
+        "scaling_column_labels": [],
+        "redundant_feature_label": ["MCH", "MCHC", "MCV"]
+    }
+
+    from ads.feature_store.transformation import Transformation,TransformationMode
+
+    transformation = (
+        Transformation()
+        .with_display_name("chained_transformation")
+        .with_feature_store_id(feature_store.id)
+        .with_source_code_function(chained_transformation)
+        .with_transformation_mode(TransformationMode.PANDAS)
+        .with_description("transformation to perform feature engineering")
+        .with_compartment_id(compartment_id)
+    )
+
+    transformation.create()
+
+
+  .. code-tab:: Python3
+    :caption: TransformationMode.SPARK
+
+    def credit_score_transformation(credit_score):
+        import pyspark.sql.functions as F
+
+        # Create a new Spark DataFrame that contains the transformed credit score.
+        transformed_credit_score = credit_score.select(
+            "user_id",
+            "date",
+            F.when(F.col("credit_score").cast("int") > 500, 1).otherwise(0).alias("credit_score")
+        )
+
+        # Return the new Spark DataFrame.
+        return transformed_credit_score
+
+    from ads.feature_store.transformation import Transformation,TransformationMode
+
+    transformation = (
+        Transformation()
+        .with_display_name("spark_transformation")
+        .with_feature_store_id(feature_store.id)
+        .with_source_code_function(credit_score_transformation)
+        .with_transformation_mode(TransformationMode.SPARK)
+        .with_description("transformation to perform feature engineering")
+        .with_compartment_id(compartment_id)
+    )
+
+    transformation.create()
+
+
 Define
 ======