added release notes

KshitizLohia · KshitizLohia · commit dde22e0c5a6f · 2023-07-17T20:16:28.000+05:30
diff --git a/ads/feature_store/docs/source/conf.py b/ads/feature_store/docs/source/conf.py
@@ -9,7 +9,7 @@
 
 sys.path.insert(0, os.path.abspath("../../"))
 
-version = "1.0"
+version = "1.2"
 release = version
 
 
@@ -54,7 +54,7 @@
 
 # Get version
 
-version = "1.0"
+version = "1.2"
 release = version
 
 # Unless we want to expose real buckets and namespaces
diff --git a/ads/feature_store/docs/source/feature_validation.rst b/ads/feature_store/docs/source/feature_validation.rst
@@ -1,7 +1,7 @@
 .. _Feature Validation:
 
 Feature Validation
-*************
+******************
 
 Feature validation is the process of checking the quality and accuracy of the features used in a machine learning model. This is important because features that are not accurate or reliable can lead to poor model performance.
 Feature store allows you to define expectation on the data which is being materialized into feature group and dataset. This is achieved using open source library Great Expectations.
@@ -43,7 +43,7 @@ An Expectation is a verifiable assertion about your data. You can define expecta
     )
 
 Expectations Suite
-============
+===================
 
 Expectation Suite is a collection of verifiable assertions i.e. expectations about your data. You can define expectation suite as below:
 
diff --git a/ads/feature_store/docs/source/release_notes.rst b/ads/feature_store/docs/source/release_notes.rst
@@ -3,6 +3,35 @@
 =============
 Release Notes
 =============
+1.2
+---
+.. note::
+
+    .. list-table::
+      :header-rows: 1
+
+      * - Package Name
+        - Latest Version
+        - Notes
+      * - Conda pack
+        - `https://objectstorage.us-ashburn-1.oraclecloud.com/n/bigdatadatasciencelarge/b/service-conda-packs-fs/o/service_pack/cpu/PySpark_3.2_and_Feature_Store/1.0/fspyspark32_p38_cpu_v1#conda`
+        -
+      * - SERVICE_VERSION
+        - 0.1.218.master
+        -
+      * - Terraform Stack
+        - `link <https://objectstorage.us-ashburn-1.oraclecloud.com/p/vZogtXWwHqbkGLeqyKiqBmVxdbR4MK4nyOBqDsJNVE4sHGUY5KFi4T3mOFGA3FOy/n/idogsu2ylimg/b/oci-feature-store/o/beta/terraform/feature-store-terraform.zip>`__
+        - Par link expires Jan 5, 2026
+
+Release notes: July 21, 2023
+
+* [FEATURE] Supporting for deployment in ``us-ashburn`` and ``uk-london`` region.
+* [FEATURE] For ``ValidationOutput`` instance, addition of ``to_summary()`` method  for validation summary details.
+* [FEATURE] For ``ValidationOutput`` instance, addition of ``to_pandas()`` method  for validation detailed report.
+* [FIX] Fixed unit test integration to support the merging of ADS into the main branch.
+* [DOCS] For ``ValidationOutput`` instance, addition of ``to_summary()`` method  for validation summary details.
+* [DOCS] For ``ValidationOutput`` instance, addition of ``to_pandas()`` method  for validation detailed report.
+
 1.1
 ---
 
diff --git a/ads/feature_store/docs/source/terraform.rst b/ads/feature_store/docs/source/terraform.rst
@@ -90,6 +90,7 @@ Steps
   Refer :ref:`Release Notes` for getting the latest conda pack and ``SERVICE_VERSION``. Remember to replace the values within angle brackets ("<>" symbols) in the command above with the relevant values for your environment. Also, Refer :ref:`User Policies` to create feature store stack for non admin users. No policies are explicitly required for admin user.
 
 1. Run the shell command.
+
   ..  code-block:: shell
 
     rm -f feature-store-terraform.zip \
@@ -116,7 +117,7 @@ Steps
         --wait-for-state FAILED
 
 Update Feature Store Stack with the Latest using OCI CLI
-==============================
+========================================================
 
 Prerequisites
 #############
@@ -139,6 +140,7 @@ Steps
   Refer :ref:`Release Notes` for getting the latest conda pack and ``SERVICE_VERSION``. Remember to replace the values within angle brackets ("<>" symbols) in the command above with the relevant values for your environment. Also, Refer :ref:`User Policies` to create feature store stack for non admin users. No policies are explicitly required for admin user.
 
 1. Run the shell command.
+
   ..  code-block:: shell
 
     rm -f feature-store-terraform.zip \
diff --git a/ads/feature_store/test.ipynb b/ads/feature_store/test.ipynb
@@ -0,0 +1,294 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-07-07T06:46:31.889241Z",
+     "start_time": "2023-07-07T06:46:23.997958Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   name  age address.street address.city address.state  \\\n",
+      "0  John   30    123 Main St     New York            NY   \n",
+      "\n",
+      "                                    address.contacts       street      city  \\\n",
+      "0  [{'type': 'email', 'value': 'john@example.com'...  123 Main St  New York   \n",
+      "\n",
+      "  state                                           contacts  \n",
+      "0    NY  [{'type': 'email', 'value': 'john@example.com'...  \n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import json\n",
+    "\n",
+    "data = {\n",
+    "    \"name\": \"John\",\n",
+    "    \"age\": 30,\n",
+    "    \"address\": {\n",
+    "        \"street\": \"123 Main St\",\n",
+    "        \"city\": \"New York\",\n",
+    "        \"state\": \"NY\",\n",
+    "        \"contacts\": [\n",
+    "            {\n",
+    "                \"type\": \"email\",\n",
+    "                \"value\": \"john@example.com\"\n",
+    "            },\n",
+    "            {\n",
+    "                \"type\": \"phone\",\n",
+    "                \"value\": \"123-456-7890\"\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "# Flatten the outer level\n",
+    "df_outer = pd.json_normalize(data)\n",
+    "\n",
+    "# Flatten the \"address\" subtable\n",
+    "df_address = pd.json_normalize(data['address'], max_level=1)\n",
+    "\n",
+    "# Combine the outer and subtable dataframes\n",
+    "df_combined = pd.concat([df_outer, df_address], axis=1)\n",
+    "\n",
+    "print(df_combined)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "   name  age address.street address.city address.state  \\\n0  John   30    123 Main St     New York            NY   \n\n                                    address.contacts       street      city  \\\n0  [{'type': 'email', 'value': 'john@example.com'...  123 Main St  New York   \n\n  state                                           contacts  \n0    NY  [{'type': 'email', 'value': 'john@example.com'...  ",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>name</th>\n      <th>age</th>\n      <th>address.street</th>\n      <th>address.city</th>\n      <th>address.state</th>\n      <th>address.contacts</th>\n      <th>street</th>\n      <th>city</th>\n      <th>state</th>\n      <th>contacts</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>John</td>\n      <td>30</td>\n      <td>123 Main St</td>\n      <td>New York</td>\n      <td>NY</td>\n      <td>[{'type': 'email', 'value': 'john@example.com'...</td>\n      <td>123 Main St</td>\n      <td>New York</td>\n      <td>NY</td>\n      <td>[{'type': 'email', 'value': 'john@example.com'...</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_combined"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-07-07T06:46:38.652495Z",
+     "start_time": "2023-07-07T06:46:38.619414Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   name  age address.street address.city address.state  \\\n",
+      "0  John   30    123 Main St     New York            NY   \n",
+      "\n",
+      "                                    address.contacts  \n",
+      "0  [{\"type\": \"email\", \"value\": \"john@example.com\"...  \n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import json\n",
+    "\n",
+    "data = {\n",
+    "    \"name\": \"John\",\n",
+    "    \"age\": 30,\n",
+    "    \"address\": {\n",
+    "        \"street\": \"123 Main St\",\n",
+    "        \"city\": \"New York\",\n",
+    "        \"state\": \"NY\",\n",
+    "        \"contacts\": [\n",
+    "            {\n",
+    "                \"type\": \"email\",\n",
+    "                \"value\": \"john@example.com\"\n",
+    "            },\n",
+    "            {\n",
+    "                \"type\": \"phone\",\n",
+    "                \"value\": \"123-456-7890\"\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "df = pd.DataFrame()\n",
+    "for key, value in data.items():\n",
+    "    if isinstance(value, dict):\n",
+    "        for subkey, subvalue in value.items():\n",
+    "            if isinstance(subvalue, list):\n",
+    "                df[f'{key}.{subkey}'] = [json.dumps(subvalue)]\n",
+    "            else:\n",
+    "                df[f'{key}.{subkey}'] = [subvalue]\n",
+    "    else:\n",
+    "        df[key] = [value]\n",
+    "\n",
+    "print(df)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-07-07T06:48:36.982219Z",
+     "start_time": "2023-07-07T06:48:36.965884Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "   name  age address.street address.city address.state  \\\n0  John   30    123 Main St     New York            NY   \n\n                                    address.contacts  \n0  [{\"type\": \"email\", \"value\": \"john@example.com\"...  ",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>name</th>\n      <th>age</th>\n      <th>address.street</th>\n      <th>address.city</th>\n      <th>address.state</th>\n      <th>address.contacts</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>John</td>\n      <td>30</td>\n      <td>123 Main St</td>\n      <td>New York</td>\n      <td>NY</td>\n      <td>[{\"type\": \"email\", \"value\": \"john@example.com\"...</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-07-07T06:48:40.293858Z",
+     "start_time": "2023-07-07T06:48:40.278669Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "outputs": [
+    {
+     "ename": "ValueError",
+     "evalue": "DataFrame constructor not properly called!",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mValueError\u001B[0m                                Traceback (most recent call last)",
+      "\u001B[0;32m/var/folders/pg/zfq6crsd7kdd3qw8txc6c1h00000gn/T/ipykernel_47057/3621665856.py\u001B[0m in \u001B[0;36m?\u001B[0;34m()\u001B[0m\n\u001B[1;32m     23\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     24\u001B[0m \u001B[0mdf_outer\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mpd\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mDataFrame\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdata\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mindex\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     25\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     26\u001B[0m \u001B[0;31m# Extract the subtable as a separate dataframe\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 27\u001B[0;31m \u001B[0mdf_subtable\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mpd\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mDataFrame\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mdf_outer\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;34m'address'\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mvalues\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0;36m0\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m     28\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     29\u001B[0m \u001B[0;31m# Remove the subtable column from the main dataframe\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m     30\u001B[0m \u001B[0mdf_outer\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdrop\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'address'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0maxis\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;36m1\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0minplace\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;32mTrue\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;32m~/IdeaProjects/oracle/feature-store/advanced-ds/venv/lib/python3.10/site-packages/pandas/core/frame.py\u001B[0m in \u001B[0;36m?\u001B[0;34m(self, data, index, columns, dtype, copy)\u001B[0m\n\u001B[1;32m    777\u001B[0m                 )\n\u001B[1;32m    778\u001B[0m         \u001B[0;31m# For data is scalar\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    779\u001B[0m         \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    780\u001B[0m             \u001B[0;32mif\u001B[0m \u001B[0mindex\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mNone\u001B[0m \u001B[0;32mor\u001B[0m \u001B[0mcolumns\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 781\u001B[0;31m                 \u001B[0;32mraise\u001B[0m \u001B[0mValueError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"DataFrame constructor not properly called!\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m    782\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    783\u001B[0m             \u001B[0mindex\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mensure_index\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mindex\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m    784\u001B[0m             \u001B[0mcolumns\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mensure_index\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mcolumns\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
+      "\u001B[0;31mValueError\u001B[0m: DataFrame constructor not properly called!"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import json\n",
+    "\n",
+    "data = {\n",
+    "    \"name\": \"John\",\n",
+    "    \"age\": 30,\n",
+    "    \"address\": {\n",
+    "        \"street\": \"123 Main St\",\n",
+    "        \"city\": \"New York\",\n",
+    "        \"state\": \"NY\",\n",
+    "        \"contacts\": [\n",
+    "            {\n",
+    "                \"type\": \"email\",\n",
+    "                \"value\": \"john@example.com\"\n",
+    "            },\n",
+    "            {\n",
+    "                \"type\": \"phone\",\n",
+    "                \"value\": \"123-456-7890\"\n",
+    "            }\n",
+    "        ]\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "df_outer = pd.DataFrame(data, index=[0])\n",
+    "\n",
+    "# Extract the subtable as a separate dataframe\n",
+    "df_subtable = pd.DataFrame(df_outer['address'].values[0])\n",
+    "\n",
+    "# Remove the subtable column from the main dataframe\n",
+    "df_outer.drop('address', axis=1, inplace=True)\n",
+    "\n",
+    "# Combine the main dataframe with the subtable dataframe\n",
+    "df_combined = pd.concat([df_outer, df_subtable], axis=1)\n",
+    "\n",
+    "print(df_combined)\n"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-07-07T06:49:19.205319Z",
+     "start_time": "2023-07-07T06:49:18.929206Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "   name  age address.street address.city address.state  \\\n0  John   30    123 Main St     New York            NY   \n\n                                    address.contacts       street      city  \\\n0  [{'type': 'email', 'value': 'john@example.com'...  123 Main St  New York   \n\n  state                                           contacts  \n0    NY  [{'type': 'email', 'value': 'john@example.com'...  ",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>name</th>\n      <th>age</th>\n      <th>address.street</th>\n      <th>address.city</th>\n      <th>address.state</th>\n      <th>address.contacts</th>\n      <th>street</th>\n      <th>city</th>\n      <th>state</th>\n      <th>contacts</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>John</td>\n      <td>30</td>\n      <td>123 Main St</td>\n      <td>New York</td>\n      <td>NY</td>\n      <td>[{'type': 'email', 'value': 'john@example.com'...</td>\n      <td>123 Main St</td>\n      <td>New York</td>\n      <td>NY</td>\n      <td>[{'type': 'email', 'value': 'john@example.com'...</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_combined"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-07-07T06:49:23.854478Z",
+     "start_time": "2023-07-07T06:49:23.842264Z"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}