CodeCutTech
diff --git a/‎Chapter5/feature_engineer.ipynb
Lines changed: 100 additions & 0 deletions b/‎Chapter5/feature_engineer.ipynb
Lines changed: 100 additions & 0 deletions
@@ -594,6 +594,106 @@
     "pipe.predict(X)\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### FunctionTransformer: Build Robust Preprocessing Pipelines with Custom Transformations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you want to constructs a transformer from an arbitrary callable, use `FunctionTransformer` in scikit-learn."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0.        , 0.69314718],\n",
+       "       [1.09861229, 1.38629436]])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "from sklearn.preprocessing import FunctionTransformer\n",
+    "\n",
+    "transformer = FunctionTransformer(np.log1p)\n",
+    "X = np.array([[0, 1], [2, 3]])\n",
+    "transformer.transform(X)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `FunctionTransformer` enables integrating your custom function seamlessly into scikit-learn's pipeline framework, making it easier to build complex preprocessing workflows and ensure consistent application of transformations across different datasets."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predictions: [1 1]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.preprocessing import FunctionTransformer\n",
+    "import numpy as np\n",
+    "\n",
+    "# Create a simple pandas DataFrame\n",
+    "data = {\n",
+    "    \"feature1\": [1, 2, 3, 4, 5],\n",
+    "    \"feature2\": [6, 7, 8, 9, 10],\n",
+    "    \"target\": [0, 0, 1, 1, 1],\n",
+    "}\n",
+    "df = pd.DataFrame(data)\n",
+    "\n",
+    "# Split the DataFrame into features and target\n",
+    "X = df[[\"feature1\", \"feature2\"]]\n",
+    "y = df[\"target\"]\n",
+    "\n",
+    "# Define the FunctionTransformer\n",
+    "log_transformer = FunctionTransformer(np.log1p)\n",
+    "\n",
+    "\n",
+    "# Define the pipeline\n",
+    "pipeline = Pipeline(\n",
+    "    [(\"log_transform\", log_transformer), (\"classifier\", LogisticRegression())]\n",
+    ")\n",
+    "\n",
+    "# Fit the pipeline on the data\n",
+    "pipeline.fit(X, y)\n",
+    "\n",
+    "# Make predictions on new data\n",
+    "new_data = {\"feature1\": [6, 7], \"feature2\": [11, 12]}\n",
+    "new_df = pd.DataFrame(new_data)\n",
+    "predictions = pipeline.predict(new_df)\n",
+    "\n",
+    "# Print the predictions\n",
+    "print(\"Predictions:\", predictions)"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",