From fc61a64530fb8fc636dc33f4f02761005b673c10 Mon Sep 17 00:00:00 2001 From: Nitin Sai <43436336+Nitin1901@users.noreply.github.com> Date: Thu, 5 Mar 2020 17:12:45 +0530 Subject: [PATCH] New changes to the code were made After the update of sk-learn modules, Imputer isn't available and is modified to SimpleImputer and ColumnTransformer is a combination LabelEncoding and OneHotEncoding into just one line of code. --- Code/Day 1_Data PreProcessing.md | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/Code/Day 1_Data PreProcessing.md b/Code/Day 1_Data PreProcessing.md index 569d0e6..84c0924 100644 --- a/Code/Day 1_Data PreProcessing.md +++ b/Code/Day 1_Data PreProcessing.md @@ -15,27 +15,21 @@ import pandas as pd ```python dataset = pd.read_csv('Data.csv') X = dataset.iloc[ : , :-1].values -Y = dataset.iloc[ : , 3].values +Y = dataset.iloc[ : , -1].values ``` ## Step 3: Handling the missing data ```python -from sklearn.preprocessing import Imputer -imputer = Imputer(missing_values = "NaN", strategy = "mean", axis = 0) -imputer = imputer.fit(X[ : , 1:3]) -X[ : , 1:3] = imputer.transform(X[ : , 1:3]) +from sklearn.impute import SimpleImputer +imputer = SimpleImputer(missing_values=np.nan, strategy='mean', verbose=0) +X[:, [1,2]] = imputer.fit_transform(X[:, [1,2]]) ``` ## Step 4: Encoding categorical data ```python from sklearn.preprocessing import LabelEncoder, OneHotEncoder -labelencoder_X = LabelEncoder() -X[ : , 0] = labelencoder_X.fit_transform(X[ : , 0]) -``` -### Creating a dummy variable -```python -onehotencoder = OneHotEncoder(categorical_features = [0]) -X = onehotencoder.fit_transform(X).toarray() -labelencoder_Y = LabelEncoder() -Y = labelencoder_Y.fit_transform(Y) +from sklearn.compose import ColumnTransformer +ct = ColumnTransformer([('encoder', OneHotEncoder(), [0])], remainder='passthrough') +X = np.array(ct.fit_transform(X), dtype=np.float) +y = LabelEncoder().fit_transform(y) ``` ## Step 5: Splitting the datasets into training sets and Test sets ```python