MNT cleaner Cython coordinate descent in _cd_fast.pyx (scikit-learn#31372)

lorentzenchr · web-flow · commit c66d595bb3f7 · 2025-05-22T08:15:47.000Z
diff --git a/sklearn/linear_model/_cd_fast.pyx b/sklearn/linear_model/_cd_fast.pyx
@@ -83,6 +83,21 @@ cdef floating diff_abs_max(int n, const floating* a, floating* b) noexcept nogil
     return m
 
 
+message_conv = (
+    "Objective did not converge. You might want to increase "
+    "the number of iterations, check the scale of the "
+    "features or consider increasing regularisation."
+)
+
+
+message_ridge = (
+    "Linear regression models with a zero l1 penalization "
+    "strength are more efficiently fitted using one of the "
+    "solvers implemented in "
+    "sklearn.linear_model.Ridge/RidgeCV instead."
+)
+
+
 def enet_coordinate_descent(
     floating[::1] w,
     floating alpha,
@@ -141,7 +156,7 @@ def enet_coordinate_descent(
     cdef floating R_norm2
     cdef floating w_norm2
     cdef floating l1_norm
-    cdef floating const
+    cdef floating const_
     cdef floating A_norm2
     cdef unsigned int ii
     cdef unsigned int n_iter = 0
@@ -227,19 +242,18 @@ def enet_coordinate_descent(
                 w_norm2 = _dot(n_features, &w[0], 1, &w[0], 1)
 
                 if (dual_norm_XtA > alpha):
-                    const = alpha / dual_norm_XtA
-                    A_norm2 = R_norm2 * (const ** 2)
+                    const_ = alpha / dual_norm_XtA
+                    A_norm2 = R_norm2 * (const_ ** 2)
                     gap = 0.5 * (R_norm2 + A_norm2)
                 else:
-                    const = 1.0
+                    const_ = 1.0
                     gap = R_norm2
 
                 l1_norm = _asum(n_features, &w[0], 1)
 
-                # np.dot(R.T, y)
                 gap += (alpha * l1_norm
-                        - const * _dot(n_samples, &R[0], 1, &y[0], 1)
-                        + 0.5 * beta * (1 + const ** 2) * (w_norm2))
+                        - const_ * _dot(n_samples, &R[0], 1, &y[0], 1)  # np.dot(R.T, y)
+                        + 0.5 * beta * (1 + const_ ** 2) * (w_norm2))
 
                 if gap < tol:
                     # return if we reached desired tolerance
@@ -249,18 +263,11 @@ def enet_coordinate_descent(
             # for/else, runs if for doesn't end with a `break`
             with gil:
                 message = (
-                    "Objective did not converge. You might want to increase "
-                    "the number of iterations, check the scale of the "
-                    "features or consider increasing regularisation. "
-                    f"Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
+                    message_conv +
+                    f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
                 )
                 if alpha < np.finfo(np.float64).eps:
-                    message += (
-                        " Linear regression models with null weight for the "
-                        "l1 regularization term are more efficiently fitted "
-                        "using one of the solvers implemented in "
-                        "sklearn.linear_model.Ridge/RidgeCV instead."
-                    )
+                    message += "\n" + message_ridge
                 warnings.warn(message, ConvergenceWarning)
 
     return np.asarray(w), gap, tol, n_iter + 1
@@ -313,53 +320,50 @@ def sparse_enet_coordinate_descent(
     # that every calculation results as if we had rescaled y and X (and therefore also
     # X_mean) by sqrt(sample_weight) without actually calculating the square root.
     # We work with:
-    #     yw = sample_weight
+    #     yw = sample_weight * y
     #     R = sample_weight * residual
     #     norm_cols_X = np.sum(sample_weight * (X - X_mean)**2, axis=0)
 
+    if floating is float:
+        dtype = np.float32
+    else:
+        dtype = np.float64
+
     # get the data information into easy vars
     cdef unsigned int n_samples = y.shape[0]
     cdef unsigned int n_features = w.shape[0]
 
     # compute norms of the columns of X
-    cdef unsigned int ii
-    cdef floating[:] norm_cols_X
-
-    cdef unsigned int startptr = X_indptr[0]
-    cdef unsigned int endptr
+    cdef floating[:] norm_cols_X = np.zeros(n_features, dtype=dtype)
 
     # initial value of the residuals
     # R = y - Zw, weighted version R = sample_weight * (y - Zw)
     cdef floating[::1] R
-    cdef floating[::1] XtA
+    cdef floating[::1] XtA = np.empty(n_features, dtype=dtype)
     cdef const floating[::1] yw
 
-    if floating is float:
-        dtype = np.float32
-    else:
-        dtype = np.float64
-
-    norm_cols_X = np.zeros(n_features, dtype=dtype)
-    XtA = np.zeros(n_features, dtype=dtype)
-
     cdef floating tmp
     cdef floating w_ii
     cdef floating d_w_max
     cdef floating w_max
     cdef floating d_w_ii
+    cdef floating gap = tol + 1.0
+    cdef floating d_w_tol = tol
+    cdef floating dual_norm_XtA
     cdef floating X_mean_ii
     cdef floating R_sum = 0.0
     cdef floating R_norm2
     cdef floating w_norm2
-    cdef floating A_norm2
     cdef floating l1_norm
+    cdef floating const_
+    cdef floating A_norm2
     cdef floating normalize_sum
-    cdef floating gap = tol + 1.0
-    cdef floating d_w_tol = tol
-    cdef floating dual_norm_XtA
+    cdef unsigned int ii
     cdef unsigned int jj
     cdef unsigned int n_iter = 0
     cdef unsigned int f_iter
+    cdef unsigned int startptr = X_indptr[0]
+    cdef unsigned int endptr
     cdef uint32_t rand_r_state_seed = rng.randint(0, RAND_R_MAX)
     cdef uint32_t* rand_r_state = &rand_r_state_seed
     cdef bint center = False
@@ -380,6 +384,7 @@ def sparse_enet_coordinate_descent(
                 center = True
                 break
 
+        # R = y - np.dot(X, w)
         for ii in range(n_features):
             X_mean_ii = X_mean[ii]
             endptr = X_indptr[ii + 1]
@@ -396,6 +401,7 @@ def sparse_enet_coordinate_descent(
                     for jj in range(n_samples):
                         R[jj] += X_mean_ii * w_ii
             else:
+                # R = sw * (y - np.dot(X, w))
                 for jj in range(startptr, endptr):
                     tmp = sample_weight[X_indices[jj]]
                     # second term will be subtracted by loop over range(n_samples)
@@ -526,21 +532,18 @@ def sparse_enet_coordinate_descent(
                 # w_norm2 = np.dot(w, w)
                 w_norm2 = _dot(n_features, &w[0], 1, &w[0], 1)
                 if (dual_norm_XtA > alpha):
-                    const = alpha / dual_norm_XtA
-                    A_norm2 = R_norm2 * const**2
+                    const_ = alpha / dual_norm_XtA
+                    A_norm2 = R_norm2 * const_**2
                     gap = 0.5 * (R_norm2 + A_norm2)
                 else:
-                    const = 1.0
+                    const_ = 1.0
                     gap = R_norm2
 
                 l1_norm = _asum(n_features, &w[0], 1)
 
-                gap += (alpha * l1_norm - const * _dot(
-                            n_samples,
-                            &R[0], 1,
-                            &y[0], 1
-                            )
-                        + 0.5 * beta * (1 + const ** 2) * w_norm2)
+                gap += (alpha * l1_norm
+                        - const_ * _dot(n_samples, &R[0], 1, &y[0], 1)  # np.dot(R.T, y)
+                        + 0.5 * beta * (1 + const_ ** 2) * w_norm2)
 
                 if gap < tol:
                     # return if we reached desired tolerance
@@ -549,10 +552,13 @@ def sparse_enet_coordinate_descent(
         else:
             # for/else, runs if for doesn't end with a `break`
             with gil:
-                warnings.warn("Objective did not converge. You might want to "
-                              "increase the number of iterations. Duality "
-                              "gap: {}, tolerance: {}".format(gap, tol),
-                              ConvergenceWarning)
+                message = (
+                    message_conv +
+                    f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
+                )
+                if alpha < np.finfo(np.float64).eps:
+                    message += "\n" + message_ridge
+                warnings.warn(message, ConvergenceWarning)
 
     return np.asarray(w), gap, tol, n_iter + 1
 
@@ -702,19 +708,19 @@ def enet_coordinate_descent_gram(
                 w_norm2 = _dot(n_features, &w[0], 1, &w[0], 1)
 
                 if (dual_norm_XtA > alpha):
-                    const = alpha / dual_norm_XtA
-                    A_norm2 = R_norm2 * (const ** 2)
+                    const_ = alpha / dual_norm_XtA
+                    A_norm2 = R_norm2 * (const_ ** 2)
                     gap = 0.5 * (R_norm2 + A_norm2)
                 else:
-                    const = 1.0
+                    const_ = 1.0
                     gap = R_norm2
 
                 # The call to asum is equivalent to the L1 norm of w
                 gap += (
                     alpha * _asum(n_features, &w[0], 1)
-                    - const * y_norm2
-                    + const * q_dot_w
-                    + 0.5 * beta * (1 + const ** 2) * w_norm2
+                    - const_ * y_norm2
+                    + const_ * q_dot_w
+                    + 0.5 * beta * (1 + const_ ** 2) * w_norm2
                 )
 
                 if gap < tol:
@@ -724,10 +730,11 @@ def enet_coordinate_descent_gram(
         else:
             # for/else, runs if for doesn't end with a `break`
             with gil:
-                warnings.warn("Objective did not converge. You might want to "
-                              "increase the number of iterations. Duality "
-                              "gap: {}, tolerance: {}".format(gap, tol),
-                              ConvergenceWarning)
+                message = (
+                    message_conv +
+                    f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
+                )
+                warnings.warn(message, ConvergenceWarning)
 
     return np.asarray(w), gap, tol, n_iter + 1
 
@@ -921,11 +928,11 @@ def enet_coordinate_descent_multi_task(
                 R_norm = _nrm2(n_samples * n_tasks, &R[0, 0], 1)
                 w_norm = _nrm2(n_features * n_tasks, &W[0, 0], 1)
                 if (dual_norm_XtA > l1_reg):
-                    const = l1_reg / dual_norm_XtA
-                    A_norm = R_norm * const
+                    const_ = l1_reg / dual_norm_XtA
+                    A_norm = R_norm * const_
                     gap = 0.5 * (R_norm ** 2 + A_norm ** 2)
                 else:
-                    const = 1.0
+                    const_ = 1.0
                     gap = R_norm ** 2
 
                 # ry_sum = np.sum(R * y)
@@ -938,8 +945,8 @@ def enet_coordinate_descent_multi_task(
 
                 gap += (
                     l1_reg * l21_norm
-                    - const * ry_sum
-                    + 0.5 * l2_reg * (1 + const ** 2) * (w_norm ** 2)
+                    - const_ * ry_sum
+                    + 0.5 * l2_reg * (1 + const_ ** 2) * (w_norm ** 2)
                 )
 
                 if gap <= tol:
@@ -948,9 +955,10 @@ def enet_coordinate_descent_multi_task(
         else:
             # for/else, runs if for doesn't end with a `break`
             with gil:
-                warnings.warn("Objective did not converge. You might want to "
-                              "increase the number of iterations. Duality "
-                              "gap: {}, tolerance: {}".format(gap, tol),
-                              ConvergenceWarning)
+                message = (
+                    message_conv +
+                    f" Duality gap: {gap:.3e}, tolerance: {tol:.3e}"
+                )
+                warnings.warn(message, ConvergenceWarning)
 
     return np.asarray(W), gap, tol, n_iter + 1