11from sklearn .utils import resample
22from sklearn .base import ClassifierMixin , BaseEstimator
3- from sklearn .preprocessing import LabelEncoder
3+ from sklearn .preprocessing import LabelBinarizer
44
55from math import ceil
66
77import numpy as np
88
99import torch as pyt
10-
11- from skorch import NeuralNetClassifier
10+ from torch .cuda import is_available as is_gpu_available
1211
1312from scipy .sparse import issparse
1413
@@ -39,7 +38,12 @@ def __init__(self, n_in, n_out):
3938 self .D_3 = pyt .nn .Linear (
4039 in_features = self .n_out * 16 , out_features = self .n_out
4140 ) # in_features = *8
42- self .O = pyt .nn .Softmax (dim = - 1 )
41+
42+ if self .n_out > 2 :
43+ self .O = pyt .nn .Softmax (dim = - 1 )
44+
45+ else :
46+ self .O = pyt .nn .Sigmoid ()
4347
4448 def forward (self , x ):
4549 o = self .IN (x )
@@ -50,10 +54,10 @@ def forward(self, x):
5054 o = self .D_2 (o )
5155 o = self .A_2 (o )
5256 o = self .Dr_2 (o )
53- o = self .D_3 (o )
54- o = self .O (o )
57+ logit = self .D_3 (o )
58+ probs = self .O (logit )
5559
56- return o
60+ return logit , probs
5761
5862
5963class ANNClassifier (ClassifierMixin , BaseEstimator ):
@@ -71,7 +75,7 @@ def fit(self, X, y):
7175 X_not_sparse = X
7276
7377 # Encode y
74- self .y_transformer = LabelEncoder ().fit (y )
78+ self .y_transformer = LabelBinarizer ().fit (y )
7579
7680 # Select features
7781 if X_not_sparse .shape [1 ] >= 4 :
@@ -92,41 +96,96 @@ def fit(self, X, y):
9296 stratify = y ,
9397 )
9498 X_trf = X_trf .astype (np .float32 )
95- y_trf = self .y_transformer .transform (y_trf ).astype (np .int64 )
99+ y_trf = self .y_transformer .transform (y_trf ).astype (np .float32 )
96100
97101 # Determine if minimum class count exists
98- self .classes_ , y_counts = np .unique (y_trf , return_counts = True )
102+ self .classes_ , y_counts = np .unique (y , return_counts = True )
99103
100104 self .y_min = min (y_counts ) * 0.8
101105
102106 # Use neural network if more than 6 samples are present in the minority class
103107 if self .y_min > self .minority :
104108 self .n_in = X_trf .shape [1 ]
109+
105110 self .n_out = self .classes_ .shape [0 ]
111+ if self .n_out == 2 :
112+ self .n_out = 1
113+
114+ # Get device
115+ use_autocast = False
116+ if is_gpu_available ():
117+ use_autocast = True
118+ device_type = "cuda:0"
119+ self .device = pyt .device ("cuda:0" )
106120
107- if pyt .cuda .is_available ():
108- device = "cuda"
109121 else :
110- device = "cpu"
111-
112- clf = NeuralNetClassifier (
113- LMNNet (n_in = X_trf .shape [1 ], n_out = self .classes_ .shape [0 ]),
114- optimizer = pyt .optim .AdamW ,
115- lr = 0.001 ,
116- max_epochs = 100 ,
117- batch_size = 16 ,
118- device = device ,
119- iterator_train__shuffle = True ,
120- verbose = 0 ,
121- )
122+ device_type = "cpu"
123+ self .device = pyt .device ("cpu" )
124+
125+ # Prepare data
126+ X_trf = pyt .tensor (X_trf )
127+ y_trf = pyt .tensor (y_trf )
128+
129+ dataset_train = pyt .utils .data .DataLoader (
130+ list (zip (X_trf , y_trf )),
131+ shuffle = True ,
132+ batch_size = 16
133+ )
134+
135+ # Prepare model and load it onto the GPU or CPU
136+ self .model = LMNNet (n_in = self .n_in , n_out = self .n_out )
137+ self .model .to (self .device )
138+
139+ # Prepare scheduler and optimizer
140+ optimizer = pyt .optim .AdamW (self .model .parameters (), lr = 0.01 )
141+
142+ # Prepare loss function
143+ if self .n_out > 2 :
144+ loss_fn = pyt .nn .CrossEntropyLoss ().to (self .device )
145+
146+ else :
147+ loss_fn = pyt .nn .BCEWithLogitsLoss ().to (self .device )
148+
149+ scaler = pyt .amp .GradScaler (self .device )
150+
151+ # Training loop
152+ for epoch in range (100 ):
153+
154+ if is_gpu_available ():
155+ pyt .cuda .empty_cache ()
156+
157+ # Training steps
158+ self .model .train ()
159+
160+ for batch_num , batch in enumerate (dataset_train ):
161+ x_in , y_in = batch
162+ x_in = x_in .to (self .device )
163+ y_in = y_in .to (self .device )
122164
123- clf .fit (X_trf , y_trf )
165+ with pyt .amp .autocast (
166+ device_type = device_type ,
167+ dtype = pyt .bfloat16 ,
168+ enabled = use_autocast
169+ ):
170+
171+ x_logit , x_probs = self .model (x_in )
124172
125- self .params = clf .module .state_dict ()
173+ # Calculate loss - BCE
174+ total_loss = loss_fn (x_logit , y_in )
175+
176+ # Backwards pass
177+ optimizer .zero_grad ()
178+ scaler .scale (total_loss ).backward ()
126179
127- del clf
180+ # Update weights
181+ scaler .step (optimizer )
182+ scaler .update ()
128183
129- return self , self .decision_function (X )
184+ self .params = self .model .state_dict ()
185+
186+ del self .model
187+
188+ return self , self .decision_function (X )
130189
131190 # Otherwise use an Extra Trees Classifier or Nothing
132191 else :
@@ -149,7 +208,8 @@ def predict_proba(self, X):
149208
150209 predictions = []
151210 for start in n_batch :
152- p = clf (X_tensor [start : start + 16 ]).detach ().cpu ().numpy ()
211+ _ , p = clf (X_tensor [start : start + 16 ])
212+ p = p .detach ().cpu ().numpy ()
153213 predictions .extend (p )
154214
155215 predictions = np .asarray (predictions )
@@ -161,11 +221,18 @@ def predict_proba(self, X):
161221 def decision_function (self , X ):
162222 D = self .predict_proba (X )
163223
164- return np .where (D > 0.5 , 1 , - 1 )
224+ D = np .where (D > 0.5 , 1 , - 1 )
225+
226+ if self .n_out == 1 :
227+ D = D .flatten ()
228+
229+ return D
165230
166231 def predict (self , X ):
167232 predictions = self .predict_proba (X )
168233
169234 predictions = np .argmax (predictions , axis = 1 )
170235
171- return self .y_transformer .inverse_transform (predictions )
236+ predictions = np .asarray ([self .y_transformer .classes_ [x ] for x in predictions ])
237+
238+ return predictions
0 commit comments