1
1
"""
2
2
Kullback-Leibler Importance Estimation Procedure
3
3
"""
4
+ import itertools
5
+ import warnings
4
6
5
7
import numpy as np
6
8
from sklearn .metrics import pairwise
7
9
from sklearn .exceptions import NotFittedError
8
10
from sklearn .utils import check_array
11
+ from sklearn .metrics .pairwise import KERNEL_PARAMS
9
12
10
13
from adapt .base import BaseAdaptEstimator , make_insert_doc
11
14
from adapt .utils import set_random_seed
@@ -29,29 +32,29 @@ class KLIEP(BaseAdaptEstimator):
29
32
30
33
.. math::
31
34
32
- w(x) = \sum_{x_i \in X_T} \\ alpha_i K_{\sigma} (x, x_i)
35
+ w(x) = \sum_{x_i \in X_T} \\ alpha_i K (x, x_i)
33
36
34
37
Where:
35
38
36
39
- :math:`x, x_i` are input instances.
37
40
- :math:`X_T` is the target input data.
38
41
- :math:`\\ alpha_i` are the basis functions coefficients.
39
- - :math:`K_{\sigma} (x, x_i) = \\ text{exp}(-\\ frac{ ||x - x_i||^2}{2\sigma^2} )`
40
- are kernel functions of bandwidth :math:`\sigma `.
42
+ - :math:`K (x, x_i) = \\ text{exp}(-\\ gamma ||x - x_i||^2)`
43
+ for instance if ``kernel="rbf"` `.
41
44
42
45
KLIEP algorithm consists in finding the optimal :math:`\\ alpha_i` according to
43
46
the following optimization problem:
44
47
45
48
.. math::
46
49
47
50
\max_{\\ alpha_i } \sum_{x_i \in X_T} \log(
48
- \sum_{x_j \in X_T} \\ alpha_i K_{\sigma} (x_j, x_i))
51
+ \sum_{x_j \in X_T} \\ alpha_i K (x_j, x_i))
49
52
50
53
Subject to:
51
54
52
55
.. math::
53
56
54
- \sum_{x_k \in X_S} \sum_{x_j \in X_T} \\ alpha_i K_{\sigma} (x_j, x_k)) = n_S
57
+ \sum_{x_k \in X_S} \sum_{x_j \in X_T} \\ alpha_i K (x_j, x_k)) = n_S
55
58
56
59
Where:
57
60
@@ -60,8 +63,9 @@ class KLIEP(BaseAdaptEstimator):
60
63
The above OP is solved through gradient ascent algorithm.
61
64
62
65
Furthemore a LCV procedure can be added to select the appropriate
63
- bandwidth :math:`\sigma`. The parameter is then selected using
64
- cross-validation on the :math:`J` score defined as follow:
66
+ parameters of the kernel function math::`K` (typically, the paramter
67
+ math::`\\ gamma` of the Gaussian kernel). The parameter is then selected using
68
+ cross-validation on the :math:`J` score defined as follows:
65
69
:math:`J = \\ frac{1}{|\\ mathcal{X}|} \\ sum_{x \\ in \\ mathcal{X}} \\ text{log}(w(x))`
66
70
67
71
Finally, an estimator is fitted using the reweighted labeled source instances.
@@ -71,11 +75,16 @@ class KLIEP(BaseAdaptEstimator):
71
75
target data to the training set.
72
76
73
77
Parameters
74
- ----------
75
- sigmas : float or list of float (default=1/nb_features)
76
- Kernel bandwidths.
77
- If ``sigmas`` is a list of multiple values, the
78
- kernel bandwidth is selected with the LCV procedure.
78
+ ----------
79
+ kernel : str (default="rbf")
80
+ Kernel metric.
81
+ Possible values: [‘additive_chi2’, ‘chi2’,
82
+ ‘linear’, ‘poly’, ‘polynomial’, ‘rbf’,
83
+ ‘laplacian’, ‘sigmoid’, ‘cosine’]
84
+
85
+ sigmas : float or list of float (default=None)
86
+ Deprecated, please use the ``gamma`` parameter
87
+ instead.
79
88
80
89
cv : int (default=5)
81
90
Cross-validation split parameter.
@@ -94,23 +103,69 @@ class KLIEP(BaseAdaptEstimator):
94
103
max_iter : int (default=5000)
95
104
Maximal iteration of the gradient ascent
96
105
optimization.
106
+
107
+ Yields
108
+ ------
109
+ gamma : float or list of float
110
+ Kernel parameter ``gamma``.
111
+
112
+ - For kernel = chi2::
113
+
114
+ k(x, y) = exp(-gamma Sum [(x - y)^2 / (x + y)])
115
+
116
+ - For kernel = poly or polynomial::
117
+
118
+ K(X, Y) = (gamma <X, Y> + coef0)^degree
119
+
120
+ - For kernel = rbf::
121
+
122
+ K(x, y) = exp(-gamma ||x-y||^2)
123
+
124
+ - For kernel = laplacian::
125
+
126
+ K(x, y) = exp(-gamma ||x-y||_1)
127
+
128
+ - For kernel = sigmoid::
129
+
130
+ K(X, Y) = tanh(gamma <X, Y> + coef0)
131
+
132
+ If a list is given, the LCV process is performed to
133
+ select the best parameter ``gamma``.
134
+
135
+ coef0 : floaf or list of float
136
+ Kernel parameter ``coef0``.
137
+ Used for ploynomial and sigmoid kernels.
138
+ See ``gamma`` parameter above for the
139
+ kernel formulas.
140
+ If a list is given, the LCV process is performed to
141
+ select the best parameter ``coef0``.
142
+
143
+ degree : int or list of int
144
+ Degree parameter for the polynomial
145
+ kernel. (see formula in the ``gamma``
146
+ parameter description).
147
+ If a list is given, the LCV process is performed to
148
+ select the best parameter ``degree``.
97
149
98
150
Attributes
99
151
----------
100
152
weights_ : numpy array
101
153
Training instance weights.
102
154
103
- sigma_ : float
104
- Sigma selected for the kernel
155
+ best_params_ : float
156
+ Best kernel params combination
157
+ deduced from the LCV procedure.
105
158
106
159
alphas_ : numpy array
107
160
Basis functions coefficients.
108
161
109
162
centers_ : numpy array
110
163
Center points for kernels.
111
164
112
- j_scores_ : list of float
113
- List of J scores.
165
+ j_scores_ : dict
166
+ dict of J scores with the
167
+ kernel params combination as
168
+ keys and the J scores as values.
114
169
115
170
estimator_ : object
116
171
Fitted estimator.
@@ -154,6 +209,7 @@ class KLIEP(BaseAdaptEstimator):
154
209
def __init__ (self ,
155
210
estimator = None ,
156
211
Xt = None ,
212
+ kernel = "rbf" ,
157
213
sigmas = None ,
158
214
max_centers = 100 ,
159
215
cv = 5 ,
@@ -165,6 +221,11 @@ def __init__(self,
165
221
random_state = None ,
166
222
** params ):
167
223
224
+ if sigmas is not None :
225
+ warnings .warn ("The `sigmas` argument is deprecated, "
226
+ "please use the `gamma` argument instead." ,
227
+ DeprecationWarning )
228
+
168
229
names = self ._get_param_names ()
169
230
kwargs = {k : v for k , v in locals ().items () if k in names }
170
231
kwargs .update (params )
@@ -194,9 +255,23 @@ def fit_weights(self, Xs, Xt, **kwargs):
194
255
Xt = check_array (Xt )
195
256
set_random_seed (self .random_state )
196
257
197
- self .j_scores_ = []
258
+ self .j_scores_ = {}
259
+
260
+ # LCV GridSearch
261
+ kernel_params = {k : v for k , v in self .__dict__ .items ()
262
+ if k in KERNEL_PARAMS [self .kernel ]}
263
+
264
+ # Handle deprecated sigmas (will be removed)
265
+ if (self .sigmas is not None ) and (not "gamma" in kernel_params ):
266
+ kernel_params ["gamma" ] = self .sigmas
267
+
268
+ params_dict = {k : (v if hasattr (v , "__iter__" ) else [v ]) for k , v in kernel_params .items ()}
269
+ options = params_dict
270
+ keys = options .keys ()
271
+ values = (options [key ] for key in keys )
272
+ params_comb = [dict (zip (keys , combination )) for combination in itertools .product (* values )]
198
273
199
- if hasattr ( self . sigmas , "__iter__" ) :
274
+ if len ( params_comb ) > 1 :
200
275
# Cross-validation process
201
276
if len (Xt ) < self .cv :
202
277
raise ValueError ("Length of Xt is smaller than cv value" )
@@ -206,23 +281,28 @@ def fit_weights(self, Xs, Xt, **kwargs):
206
281
207
282
shuffled_index = np .arange (len (Xt ))
208
283
np .random .shuffle (shuffled_index )
209
-
210
- for sigma in self .sigmas :
211
- cv_scores = self ._cross_val_jscore (Xs , Xt [shuffled_index ], sigma , self .cv )
212
- self .j_scores_ .append (np .mean (cv_scores ))
284
+
285
+ max_ = - np .inf
286
+ for params in params_comb :
287
+ cv_scores = self ._cross_val_jscore (Xs , Xt [shuffled_index ], params , self .cv )
288
+ self .j_scores_ [str (params )] = np .mean (cv_scores )
213
289
214
290
if self .verbose :
215
- print ("Parameter sigma = %.4f -- J-score = %.3f (%.3f)" %
216
- (sigma , np .mean (cv_scores ), np .std (cv_scores )))
291
+ print ("Parameters %s -- J-score = %.3f (%.3f)" %
292
+ (str ( params ) , np .mean (cv_scores ), np .std (cv_scores )))
217
293
218
- self .sigma_ = self .sigmas [np .argmax (self .j_scores_ )]
294
+ if self .j_scores_ [str (params )] > max_ :
295
+ self .best_params_ = params
296
+ max_ = self .j_scores_ [str (params )]
219
297
else :
220
- self .sigma_ = self . sigmas
298
+ self .best_params_ = params_comb [ 0 ]
221
299
222
- self .alphas_ , self .centers_ = self ._fit (Xs , Xt , self .sigma_ )
300
+ self .alphas_ , self .centers_ = self ._fit (Xs , Xt , self .best_params_ )
223
301
224
302
self .weights_ = np .dot (
225
- pairwise .rbf_kernel (Xs , self .centers_ , self .sigma_ ),
303
+ pairwise .pairwise_kernels (Xs , self .centers_ ,
304
+ metric = self .kernel ,
305
+ ** self .best_params_ ),
226
306
self .alphas_
227
307
).ravel ()
228
308
return self .weights_
@@ -251,7 +331,9 @@ def predict_weights(self, X=None):
251
331
else :
252
332
X = check_array (X )
253
333
weights = np .dot (
254
- pairwise .rbf_kernel (X , self .centers_ , self .sigma_ ),
334
+ pairwise .pairwise_kernels (X , self .centers_ ,
335
+ metric = self .kernel ,
336
+ ** self .best_params_ ),
255
337
self .alphas_
256
338
).ravel ()
257
339
return weights
@@ -260,15 +342,18 @@ def predict_weights(self, X=None):
260
342
"call 'fit_weights' or 'fit' first." )
261
343
262
344
263
- def _fit (self , Xs , Xt , sigma ):
345
+ def _fit (self , Xs , Xt , kernel_params ):
264
346
index_centers = np .random .choice (
265
347
len (Xt ),
266
348
min (len (Xt ), self .max_centers ),
267
349
replace = False )
268
350
centers = Xt [index_centers ]
269
-
270
- A = pairwise .rbf_kernel (Xt , centers , sigma )
271
- b = np .mean (pairwise .rbf_kernel (centers , Xs , sigma ), axis = 1 )
351
+
352
+ A = pairwise .pairwise_kernels (Xt , centers , metric = self .kernel ,
353
+ ** kernel_params )
354
+ B = pairwise .pairwise_kernels (centers , Xs , metric = self .kernel ,
355
+ ** kernel_params )
356
+ b = np .mean (B , axis = 1 )
272
357
b = b .reshape (- 1 , 1 )
273
358
274
359
alpha = np .ones ((len (centers ), 1 )) / len (centers )
@@ -297,7 +382,7 @@ def _fit(self, Xs, Xt, sigma):
297
382
return alpha , centers
298
383
299
384
300
- def _cross_val_jscore (self , Xs , Xt , sigma , cv ):
385
+ def _cross_val_jscore (self , Xs , Xt , kernel_params , cv ):
301
386
split = int (len (Xt ) / cv )
302
387
cv_scores = []
303
388
for i in range (cv ):
@@ -308,13 +393,14 @@ def _cross_val_jscore(self, Xs, Xt, sigma, cv):
308
393
309
394
alphas , centers = self ._fit (Xs ,
310
395
Xt [train_index ],
311
- sigma )
396
+ kernel_params )
312
397
313
398
j_score = np .mean (np .log (
314
399
np .dot (
315
- pairwise .rbf_kernel (Xt [test_index ],
316
- centers ,
317
- sigma ),
400
+ pairwise .pairwise_kernels (Xt [test_index ],
401
+ centers ,
402
+ metric = self .kernel ,
403
+ ** kernel_params ),
318
404
alphas
319
405
) + EPS
320
406
))
0 commit comments