@@ -83,6 +83,21 @@ cdef floating diff_abs_max(int n, const floating* a, floating* b) noexcept nogil
83
83
return m
84
84
85
85
86
+ message_conv = (
87
+ " Objective did not converge. You might want to increase "
88
+ " the number of iterations, check the scale of the "
89
+ " features or consider increasing regularisation."
90
+ )
91
+
92
+
93
+ message_ridge = (
94
+ " Linear regression models with a zero l1 penalization "
95
+ " strength are more efficiently fitted using one of the "
96
+ " solvers implemented in "
97
+ " sklearn.linear_model.Ridge/RidgeCV instead."
98
+ )
99
+
100
+
86
101
def enet_coordinate_descent (
87
102
floating[::1] w ,
88
103
floating alpha ,
@@ -141,7 +156,7 @@ def enet_coordinate_descent(
141
156
cdef floating R_norm2
142
157
cdef floating w_norm2
143
158
cdef floating l1_norm
144
- cdef floating const
159
+ cdef floating const_
145
160
cdef floating A_norm2
146
161
cdef unsigned int ii
147
162
cdef unsigned int n_iter = 0
@@ -227,19 +242,18 @@ def enet_coordinate_descent(
227
242
w_norm2 = _dot(n_features, & w[0 ], 1 , & w[0 ], 1 )
228
243
229
244
if (dual_norm_XtA > alpha):
230
- const = alpha / dual_norm_XtA
231
- A_norm2 = R_norm2 * (const ** 2 )
245
+ const_ = alpha / dual_norm_XtA
246
+ A_norm2 = R_norm2 * (const_ ** 2 )
232
247
gap = 0.5 * (R_norm2 + A_norm2)
233
248
else :
234
- const = 1.0
249
+ const_ = 1.0
235
250
gap = R_norm2
236
251
237
252
l1_norm = _asum(n_features, & w[0 ], 1 )
238
253
239
- # np.dot(R.T, y)
240
254
gap += (alpha * l1_norm
241
- - const * _dot(n_samples, & R[0 ], 1 , & y[0 ], 1 )
242
- + 0.5 * beta * (1 + const ** 2 ) * (w_norm2))
255
+ - const_ * _dot(n_samples, & R[0 ], 1 , & y[0 ], 1 ) # np.dot(R.T, y )
256
+ + 0.5 * beta * (1 + const_ ** 2 ) * (w_norm2))
243
257
244
258
if gap < tol:
245
259
# return if we reached desired tolerance
@@ -249,18 +263,11 @@ def enet_coordinate_descent(
249
263
# for/else, runs if for doesn't end with a `break`
250
264
with gil:
251
265
message = (
252
- " Objective did not converge. You might want to increase "
253
- " the number of iterations, check the scale of the "
254
- " features or consider increasing regularisation. "
255
- f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
266
+ message_conv +
267
+ f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
256
268
)
257
269
if alpha < np.finfo(np.float64).eps:
258
- message += (
259
- " Linear regression models with null weight for the "
260
- " l1 regularization term are more efficiently fitted "
261
- " using one of the solvers implemented in "
262
- " sklearn.linear_model.Ridge/RidgeCV instead."
263
- )
270
+ message += " \n " + message_ridge
264
271
warnings.warn(message, ConvergenceWarning)
265
272
266
273
return np.asarray(w), gap, tol, n_iter + 1
@@ -313,53 +320,50 @@ def sparse_enet_coordinate_descent(
313
320
# that every calculation results as if we had rescaled y and X (and therefore also
314
321
# X_mean) by sqrt(sample_weight) without actually calculating the square root.
315
322
# We work with:
316
- # yw = sample_weight
323
+ # yw = sample_weight * y
317
324
# R = sample_weight * residual
318
325
# norm_cols_X = np.sum(sample_weight * (X - X_mean)**2, axis=0)
319
326
327
+ if floating is float :
328
+ dtype = np.float32
329
+ else :
330
+ dtype = np.float64
331
+
320
332
# get the data information into easy vars
321
333
cdef unsigned int n_samples = y.shape[0 ]
322
334
cdef unsigned int n_features = w.shape[0 ]
323
335
324
336
# compute norms of the columns of X
325
- cdef unsigned int ii
326
- cdef floating[:] norm_cols_X
327
-
328
- cdef unsigned int startptr = X_indptr[0 ]
329
- cdef unsigned int endptr
337
+ cdef floating[:] norm_cols_X = np.zeros(n_features, dtype = dtype)
330
338
331
339
# initial value of the residuals
332
340
# R = y - Zw, weighted version R = sample_weight * (y - Zw)
333
341
cdef floating[::1 ] R
334
- cdef floating[::1 ] XtA
342
+ cdef floating[::1 ] XtA = np.empty(n_features, dtype = dtype)
335
343
cdef const floating[::1 ] yw
336
344
337
- if floating is float :
338
- dtype = np.float32
339
- else :
340
- dtype = np.float64
341
-
342
- norm_cols_X = np.zeros(n_features, dtype = dtype)
343
- XtA = np.zeros(n_features, dtype = dtype)
344
-
345
345
cdef floating tmp
346
346
cdef floating w_ii
347
347
cdef floating d_w_max
348
348
cdef floating w_max
349
349
cdef floating d_w_ii
350
+ cdef floating gap = tol + 1.0
351
+ cdef floating d_w_tol = tol
352
+ cdef floating dual_norm_XtA
350
353
cdef floating X_mean_ii
351
354
cdef floating R_sum = 0.0
352
355
cdef floating R_norm2
353
356
cdef floating w_norm2
354
- cdef floating A_norm2
355
357
cdef floating l1_norm
358
+ cdef floating const_
359
+ cdef floating A_norm2
356
360
cdef floating normalize_sum
357
- cdef floating gap = tol + 1.0
358
- cdef floating d_w_tol = tol
359
- cdef floating dual_norm_XtA
361
+ cdef unsigned int ii
360
362
cdef unsigned int jj
361
363
cdef unsigned int n_iter = 0
362
364
cdef unsigned int f_iter
365
+ cdef unsigned int startptr = X_indptr[0 ]
366
+ cdef unsigned int endptr
363
367
cdef uint32_t rand_r_state_seed = rng.randint(0 , RAND_R_MAX)
364
368
cdef uint32_t* rand_r_state = & rand_r_state_seed
365
369
cdef bint center = False
@@ -380,6 +384,7 @@ def sparse_enet_coordinate_descent(
380
384
center = True
381
385
break
382
386
387
+ # R = y - np.dot(X, w)
383
388
for ii in range (n_features):
384
389
X_mean_ii = X_mean[ii]
385
390
endptr = X_indptr[ii + 1 ]
@@ -396,6 +401,7 @@ def sparse_enet_coordinate_descent(
396
401
for jj in range (n_samples):
397
402
R[jj] += X_mean_ii * w_ii
398
403
else :
404
+ # R = sw * (y - np.dot(X, w))
399
405
for jj in range (startptr, endptr):
400
406
tmp = sample_weight[X_indices[jj]]
401
407
# second term will be subtracted by loop over range(n_samples)
@@ -526,21 +532,18 @@ def sparse_enet_coordinate_descent(
526
532
# w_norm2 = np.dot(w, w)
527
533
w_norm2 = _dot(n_features, & w[0 ], 1 , & w[0 ], 1 )
528
534
if (dual_norm_XtA > alpha):
529
- const = alpha / dual_norm_XtA
530
- A_norm2 = R_norm2 * const ** 2
535
+ const_ = alpha / dual_norm_XtA
536
+ A_norm2 = R_norm2 * const_ ** 2
531
537
gap = 0.5 * (R_norm2 + A_norm2)
532
538
else :
533
- const = 1.0
539
+ const_ = 1.0
534
540
gap = R_norm2
535
541
536
542
l1_norm = _asum(n_features, & w[0 ], 1 )
537
543
538
- gap += (alpha * l1_norm - const * _dot(
539
- n_samples,
540
- & R[0 ], 1 ,
541
- & y[0 ], 1
542
- )
543
- + 0.5 * beta * (1 + const ** 2 ) * w_norm2)
544
+ gap += (alpha * l1_norm
545
+ - const_ * _dot(n_samples, & R[0 ], 1 , & y[0 ], 1 ) # np.dot(R.T, y)
546
+ + 0.5 * beta * (1 + const_ ** 2 ) * w_norm2)
544
547
545
548
if gap < tol:
546
549
# return if we reached desired tolerance
@@ -549,10 +552,13 @@ def sparse_enet_coordinate_descent(
549
552
else :
550
553
# for/else, runs if for doesn't end with a `break`
551
554
with gil:
552
- warnings.warn(" Objective did not converge. You might want to "
553
- " increase the number of iterations. Duality "
554
- " gap: {}, tolerance: {}" .format(gap, tol),
555
- ConvergenceWarning)
555
+ message = (
556
+ message_conv +
557
+ f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
558
+ )
559
+ if alpha < np.finfo(np.float64).eps:
560
+ message += " \n " + message_ridge
561
+ warnings.warn(message, ConvergenceWarning)
556
562
557
563
return np.asarray(w), gap, tol, n_iter + 1
558
564
@@ -702,19 +708,19 @@ def enet_coordinate_descent_gram(
702
708
w_norm2 = _dot(n_features, & w[0 ], 1 , & w[0 ], 1 )
703
709
704
710
if (dual_norm_XtA > alpha):
705
- const = alpha / dual_norm_XtA
706
- A_norm2 = R_norm2 * (const ** 2 )
711
+ const_ = alpha / dual_norm_XtA
712
+ A_norm2 = R_norm2 * (const_ ** 2 )
707
713
gap = 0.5 * (R_norm2 + A_norm2)
708
714
else :
709
- const = 1.0
715
+ const_ = 1.0
710
716
gap = R_norm2
711
717
712
718
# The call to asum is equivalent to the L1 norm of w
713
719
gap += (
714
720
alpha * _asum(n_features, & w[0 ], 1 )
715
- - const * y_norm2
716
- + const * q_dot_w
717
- + 0.5 * beta * (1 + const ** 2 ) * w_norm2
721
+ - const_ * y_norm2
722
+ + const_ * q_dot_w
723
+ + 0.5 * beta * (1 + const_ ** 2 ) * w_norm2
718
724
)
719
725
720
726
if gap < tol:
@@ -724,10 +730,11 @@ def enet_coordinate_descent_gram(
724
730
else :
725
731
# for/else, runs if for doesn't end with a `break`
726
732
with gil:
727
- warnings.warn(" Objective did not converge. You might want to "
728
- " increase the number of iterations. Duality "
729
- " gap: {}, tolerance: {}" .format(gap, tol),
730
- ConvergenceWarning)
733
+ message = (
734
+ message_conv +
735
+ f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
736
+ )
737
+ warnings.warn(message, ConvergenceWarning)
731
738
732
739
return np.asarray(w), gap, tol, n_iter + 1
733
740
@@ -921,11 +928,11 @@ def enet_coordinate_descent_multi_task(
921
928
R_norm = _nrm2(n_samples * n_tasks, & R[0 , 0 ], 1 )
922
929
w_norm = _nrm2(n_features * n_tasks, & W[0 , 0 ], 1 )
923
930
if (dual_norm_XtA > l1_reg):
924
- const = l1_reg / dual_norm_XtA
925
- A_norm = R_norm * const
931
+ const_ = l1_reg / dual_norm_XtA
932
+ A_norm = R_norm * const_
926
933
gap = 0.5 * (R_norm ** 2 + A_norm ** 2 )
927
934
else :
928
- const = 1.0
935
+ const_ = 1.0
929
936
gap = R_norm ** 2
930
937
931
938
# ry_sum = np.sum(R * y)
@@ -938,8 +945,8 @@ def enet_coordinate_descent_multi_task(
938
945
939
946
gap += (
940
947
l1_reg * l21_norm
941
- - const * ry_sum
942
- + 0.5 * l2_reg * (1 + const ** 2 ) * (w_norm ** 2 )
948
+ - const_ * ry_sum
949
+ + 0.5 * l2_reg * (1 + const_ ** 2 ) * (w_norm ** 2 )
943
950
)
944
951
945
952
if gap <= tol:
@@ -948,9 +955,10 @@ def enet_coordinate_descent_multi_task(
948
955
else :
949
956
# for/else, runs if for doesn't end with a `break`
950
957
with gil:
951
- warnings.warn(" Objective did not converge. You might want to "
952
- " increase the number of iterations. Duality "
953
- " gap: {}, tolerance: {}" .format(gap, tol),
954
- ConvergenceWarning)
958
+ message = (
959
+ message_conv +
960
+ f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
961
+ )
962
+ warnings.warn(message, ConvergenceWarning)
955
963
956
964
return np.asarray(W), gap, tol, n_iter + 1
0 commit comments