4
4
5
5
import numpy as np
6
6
from sklearn .exceptions import NotFittedError
7
- from scipy .optimize import minimize
7
+ from sklearn .preprocessing import LabelBinarizer
8
+ from scipy .sparse .linalg import lsqr
8
9
import tensorflow as tf
9
10
from tensorflow .keras import Sequential
10
11
from tensorflow .keras .layers import Flatten , Dense
@@ -161,37 +162,63 @@ def fit(self, Xt=None, yt=None, **fit_params):
161
162
self .estimator_ = check_estimator (self .estimator ,
162
163
copy = self .copy ,
163
164
force_copy = True )
164
-
165
+
165
166
if self .estimator_ .fit_intercept :
167
+ intercept_ = np .reshape (
168
+ self .estimator_ .intercept_ ,
169
+ np .ones (self .estimator_ .coef_ .shape ).mean (- 1 , keepdims = True ).shape )
166
170
beta_src = np .concatenate ((
167
- self . estimator_ . intercept_ * np . ones ( yt . shape ). mean ( 0 , keepdims = True ) ,
168
- self .estimator_ .coef_ . transpose ()
169
- ))
171
+ intercept_ ,
172
+ self .estimator_ .coef_
173
+ ), axis = - 1 )
170
174
Xt = np .concatenate (
171
175
(np .ones ((len (Xt ), 1 )), Xt ),
172
176
axis = - 1 )
173
177
else :
174
- beta_src = self .estimator_ .coef_ . transpose ()
178
+ beta_src = self .estimator_ .coef_
175
179
176
- func = self ._get_func (Xt , yt , beta_src )
180
+ yt_ndim_below_one_ = False
181
+ if yt .ndim <= 1 :
182
+ yt = yt .reshape (- 1 , 1 )
183
+ yt_ndim_below_one_ = True
184
+
185
+ if beta_src .ndim <= 1 :
186
+ beta_src .reshape (1 , - 1 )
187
+
188
+ if beta_src .shape [0 ] != yt .shape [1 ]:
189
+ raise ValueError ("The number of features of `yt`"
190
+ " does not match the number of coefs in 'estimator', "
191
+ "expected %i, got %i" (beta_src .shape [0 ], yt .shape [1 ]))
192
+
193
+ if beta_src .shape [1 ] != Xt .shape [1 ]:
194
+ beta_shape = beta_src .shape [1 ]; Xt_shape = Xt .shape [1 ]
195
+ if self .estimator_ .fit_intercept :
196
+ beta_shape -= 1 ; Xt_shape -= 1
197
+ raise ValueError ("The number of features of `Xt`"
198
+ " does not match the number of coefs in 'estimator', "
199
+ "expected %i, got %i" (beta_shape , Xt_shape ))
200
+
201
+ beta_tgt = []
202
+ for i in range (yt .shape [1 ]):
203
+ sol = lsqr (A = Xt , b = yt [:, i ], damp = self .lambda_ , x0 = beta_src [i , :])
204
+ beta_tgt .append (sol [0 ])
205
+
206
+ beta_tgt = np .stack (beta_tgt , axis = 0 )
177
207
178
- beta_tgt = minimize (func , beta_src )['x' ]
179
- beta_tgt = beta_tgt .reshape (beta_src .shape )
180
-
181
208
if self .estimator_ .fit_intercept :
182
- self .estimator_ . intercept_ = beta_tgt [0 ]
183
- self .estimator_ . coef_ = beta_tgt [1 :]. transpose ()
209
+ self .coef_ = beta_tgt [:, 1 : ]
210
+ self .intercept_ = beta_tgt [:, 0 ]
184
211
else :
185
- self .estimator_ .coef_ = beta_tgt .transpose ()
212
+ self .coef_ = beta_tgt
213
+
214
+ if yt_ndim_below_one_ :
215
+ self .coef_ = self .coef_ .reshape (- 1 )
216
+ self .intercept_ = self .intercept_ [0 ]
217
+
218
+ self .estimator_ .coef_ = self .coef_
219
+ if self .estimator_ .fit_intercept :
220
+ self .estimator_ .intercept_ = self .intercept_
186
221
return self
187
-
188
-
189
- def _get_func (self , Xt , yt , beta_src ):
190
- def func (beta ):
191
- beta = beta .reshape (beta_src .shape )
192
- return (np .linalg .norm (Xt .dot (beta ) - yt ) ** 2 +
193
- self .lambda_ * np .linalg .norm (beta - beta_src ) ** 2 )
194
- return func
195
222
196
223
197
224
@@ -201,26 +228,23 @@ class RegularTransferLC(RegularTransferLR):
201
228
Regular Transfer for Linear Classification
202
229
203
230
RegularTransferLC is a parameter-based domain adaptation method.
204
-
205
- The method is based on the assumption that a good target estimator
206
- can be obtained by adapting the parameters of a pre-trained source
207
- estimator using a few labeled target data.
208
-
209
- The approach consist in fitting a linear estimator on target data
210
- according to an objective function regularized by the euclidean
211
- distance between source and target parameters:
231
+
232
+ This classifier first converts the target values into ``{-1, 1}``
233
+ and then treats the problem as a regression task
234
+ (multi-output regression in the multiclass case). It then fits
235
+ the target data as a ``RegularTransferLR`` regressor, i.e it
236
+ performs the following optimization:
212
237
213
238
.. math::
214
239
215
- \\ beta_T = \\ underset{\\ beta \\ in \\ mathbb{R}^p}{\\ text{argmin}}
216
- \\ , \\ ell( \\ beta, X_T, y_T) + \\ lambda ||\\ beta - \\ beta_S||^2
240
+ \\ beta_T = \\ underset{\\ beta \in \\ mathbb{R}^p}{\\ text{argmin}}
241
+ \\ , ||X_T \\ beta - y_T||^2 + \\ lambda ||\\ beta - \\ beta_S||^2
217
242
218
243
Where:
219
244
220
- - :math:`\\ ell` is the log-likelihood function.
221
245
- :math:`\\ beta_T` are the target model parameters.
222
246
- :math:`\\ beta_S = \\ underset{\\ beta \\ in \\ mathbb{R}^p}{\\ text{argmin}}
223
- \\ , \\ ell( \\ beta, X_S, y_S) ` are the source model parameters.
247
+ \\ , ||X_S \\ beta - y_S||^2 ` are the source model parameters.
224
248
- :math:`(X_S, y_S), (X_T, y_T)` are respectively the source and
225
249
the target labeled data.
226
250
- :math:`p` is the number of features in :math:`X_T`
@@ -270,13 +294,13 @@ class RegularTransferLC(RegularTransferLR):
270
294
"""
271
295
### TODO reshape yt for multiclass.
272
296
273
- def _get_func (self , Xt , yt , beta_src ):
274
- def func ( beta ):
275
- beta = beta . reshape ( beta_src . shape )
276
- return ( np . sum ( np . log ( 1 + np . exp (
277
- - ( 2 * yt - 1 ) * Xt . dot ( beta )))) +
278
- self . lambda_ * np . linalg . norm ( beta - beta_src ) ** 2 )
279
- return func
297
+ def fit (self , Xt = None , yt = None , ** fit_params ):
298
+ Xt , yt = self . _get_target_data ( Xt , yt )
299
+ Xt , yt = check_arrays ( Xt , yt )
300
+
301
+ _label_binarizer = LabelBinarizer ( pos_label = 1 , neg_label = - 1 )
302
+ yt = _label_binarizer . fit_transform ( yt )
303
+ return super (). fit ( Xt , yt , ** fit_params )
280
304
281
305
282
306
@make_insert_doc (["task" ], supervised = True )
0 commit comments