|
594 | 594 | "pipe.predict(X)\n"
|
595 | 595 | ]
|
596 | 596 | },
|
| 597 | + { |
| 598 | + "cell_type": "markdown", |
| 599 | + "metadata": {}, |
| 600 | + "source": [ |
| 601 | + "### FunctionTransformer: Build Robust Preprocessing Pipelines with Custom Transformations" |
| 602 | + ] |
| 603 | + }, |
| 604 | + { |
| 605 | + "cell_type": "markdown", |
| 606 | + "metadata": {}, |
| 607 | + "source": [ |
| 608 | + "If you want to constructs a transformer from an arbitrary callable, use `FunctionTransformer` in scikit-learn." |
| 609 | + ] |
| 610 | + }, |
| 611 | + { |
| 612 | + "cell_type": "code", |
| 613 | + "execution_count": 3, |
| 614 | + "metadata": {}, |
| 615 | + "outputs": [ |
| 616 | + { |
| 617 | + "data": { |
| 618 | + "text/plain": [ |
| 619 | + "array([[0. , 0.69314718],\n", |
| 620 | + " [1.09861229, 1.38629436]])" |
| 621 | + ] |
| 622 | + }, |
| 623 | + "execution_count": 3, |
| 624 | + "metadata": {}, |
| 625 | + "output_type": "execute_result" |
| 626 | + } |
| 627 | + ], |
| 628 | + "source": [ |
| 629 | + "import numpy as np\n", |
| 630 | + "from sklearn.preprocessing import FunctionTransformer\n", |
| 631 | + "\n", |
| 632 | + "transformer = FunctionTransformer(np.log1p)\n", |
| 633 | + "X = np.array([[0, 1], [2, 3]])\n", |
| 634 | + "transformer.transform(X)" |
| 635 | + ] |
| 636 | + }, |
| 637 | + { |
| 638 | + "cell_type": "markdown", |
| 639 | + "metadata": {}, |
| 640 | + "source": [ |
| 641 | + "The `FunctionTransformer` enables integrating your custom function seamlessly into scikit-learn's pipeline framework, making it easier to build complex preprocessing workflows and ensure consistent application of transformations across different datasets." |
| 642 | + ] |
| 643 | + }, |
| 644 | + { |
| 645 | + "cell_type": "code", |
| 646 | + "execution_count": 1, |
| 647 | + "metadata": {}, |
| 648 | + "outputs": [ |
| 649 | + { |
| 650 | + "name": "stdout", |
| 651 | + "output_type": "stream", |
| 652 | + "text": [ |
| 653 | + "Predictions: [1 1]\n" |
| 654 | + ] |
| 655 | + } |
| 656 | + ], |
| 657 | + "source": [ |
| 658 | + "import pandas as pd\n", |
| 659 | + "from sklearn.pipeline import Pipeline\n", |
| 660 | + "from sklearn.linear_model import LogisticRegression\n", |
| 661 | + "from sklearn.preprocessing import FunctionTransformer\n", |
| 662 | + "import numpy as np\n", |
| 663 | + "\n", |
| 664 | + "# Create a simple pandas DataFrame\n", |
| 665 | + "data = {\n", |
| 666 | + " \"feature1\": [1, 2, 3, 4, 5],\n", |
| 667 | + " \"feature2\": [6, 7, 8, 9, 10],\n", |
| 668 | + " \"target\": [0, 0, 1, 1, 1],\n", |
| 669 | + "}\n", |
| 670 | + "df = pd.DataFrame(data)\n", |
| 671 | + "\n", |
| 672 | + "# Split the DataFrame into features and target\n", |
| 673 | + "X = df[[\"feature1\", \"feature2\"]]\n", |
| 674 | + "y = df[\"target\"]\n", |
| 675 | + "\n", |
| 676 | + "# Define the FunctionTransformer\n", |
| 677 | + "log_transformer = FunctionTransformer(np.log1p)\n", |
| 678 | + "\n", |
| 679 | + "\n", |
| 680 | + "# Define the pipeline\n", |
| 681 | + "pipeline = Pipeline(\n", |
| 682 | + " [(\"log_transform\", log_transformer), (\"classifier\", LogisticRegression())]\n", |
| 683 | + ")\n", |
| 684 | + "\n", |
| 685 | + "# Fit the pipeline on the data\n", |
| 686 | + "pipeline.fit(X, y)\n", |
| 687 | + "\n", |
| 688 | + "# Make predictions on new data\n", |
| 689 | + "new_data = {\"feature1\": [6, 7], \"feature2\": [11, 12]}\n", |
| 690 | + "new_df = pd.DataFrame(new_data)\n", |
| 691 | + "predictions = pipeline.predict(new_df)\n", |
| 692 | + "\n", |
| 693 | + "# Print the predictions\n", |
| 694 | + "print(\"Predictions:\", predictions)" |
| 695 | + ] |
| 696 | + }, |
597 | 697 | {
|
598 | 698 | "attachments": {},
|
599 | 699 | "cell_type": "markdown",
|
|
0 commit comments