1
1
#!/usr/bin/env python
2
2
# -*- coding: utf-8; -*-
3
3
4
- # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
4
+ # Copyright (c) 2020, 2023 Oracle and/or its affiliates.
5
5
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
6
7
7
import bisect
8
- from collections import defaultdict
8
+ import numpy as np
9
9
10
+ from collections import defaultdict
10
11
from sklearn .base import TransformerMixin
11
12
from sklearn .preprocessing import LabelEncoder
12
13
13
14
14
15
class DataFrameLabelEncoder (TransformerMixin ):
15
16
"""
16
- Label encoder for pandas.dataframe. dask.dataframe.core.DataFrame
17
+ Label encoder for `pandas.DataFrame` and `dask.dataframe.core.DataFrame`.
18
+
19
+ Attributes
20
+ ----------
21
+ label_encoders : defaultdict
22
+ Holds the label encoder for each column.
23
+
24
+ Examples
25
+ --------
26
+ >>> import pandas as pd
27
+ >>> from ads.dataset.label_encoder import DataFrameLabelEncoder
28
+
29
+ >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [3, 4]})
30
+ >>> le = DataFrameLabelEncoder()
31
+ >>> le.fit_transform(X=df)
32
+
17
33
"""
18
34
19
35
def __init__ (self ):
36
+ """Initialize an instance of DataFrameLabelEncoder."""
20
37
self .label_encoders = defaultdict (LabelEncoder )
21
38
22
- def fit (self , X ):
39
+ def fit (self , X : "pandas.DataFrame" ):
23
40
"""
24
- Fits a DataFrameLAbelEncoder.
41
+ Fits a DataFrameLabelEncoder.
42
+
43
+ Parameters
44
+ ----------
45
+ X : pandas.DataFrame
46
+ Target values.
47
+
48
+ Returns
49
+ -------
50
+ self : returns an instance of self.
51
+ Fitted label encoder.
52
+
25
53
"""
26
54
for column in X .columns :
27
55
if X [column ].dtype .name in ["object" , "category" ]:
@@ -33,12 +61,24 @@ def fit(self, X):
33
61
for class_ in self .label_encoders [column ].classes_ .tolist ()
34
62
]
35
63
bisect .insort_left (label_encoder_classes_ , "unknown" )
64
+ label_encoder_classes_ = np .asarray (label_encoder_classes_ )
36
65
self .label_encoders [column ].classes_ = label_encoder_classes_
37
66
return self
38
67
39
- def transform (self , X ):
68
+ def transform (self , X : "pandas.DataFrame" ):
40
69
"""
41
- Transforms a dataset using the DataFrameLAbelEncoder.
70
+ Transforms a dataset using the DataFrameLabelEncoder.
71
+
72
+ Parameters
73
+ ----------
74
+ X : pandas.DataFrame
75
+ Target values.
76
+
77
+ Returns
78
+ -------
79
+ pandas.DataFrame
80
+ Labels as normalized encodings.
81
+
42
82
"""
43
83
categorical_columns = list (self .label_encoders .keys ())
44
84
if len (categorical_columns ) == 0 :
0 commit comments