Skip to content

Commit 9590c07

Browse files
authored
EFF Optimize memory usage for sparse matrices in LLE (Hessian, Modified and LTSA) (scikit-learn#28096)
1 parent 87fa654 commit 9590c07

File tree

2 files changed

+21
-13
lines changed

2 files changed

+21
-13
lines changed

doc/whats_new/v1.6.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,14 @@ Changelog
136136
has no effect. `copy_X` will be removed in 1.8.
137137
:pr:`29105` by :user:`Adam Li <adam2392>`.
138138

139+
:mod:`sklearn.manifold`
140+
.......................
141+
142+
- |Efficiency| :func:`manifold.locally_linear_embedding` and
143+
:class:`manifold.LocallyLinearEmbedding` now allocate more efficiently the memory of
144+
sparse matrices in the Hessian, Modified and LTSA methods.
145+
:pr:`28096` by :user:`Giorgio Angelotti <giorgioangel>`.
146+
139147
:mod:`sklearn.metrics`
140148
......................
141149

sklearn/manifold/_locally_linear.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import numpy as np
99
from scipy.linalg import eigh, qr, solve, svd
10-
from scipy.sparse import csr_matrix, eye
10+
from scipy.sparse import csr_matrix, eye, lil_matrix
1111
from scipy.sparse.linalg import eigsh
1212

1313
from ..base import (
@@ -229,6 +229,7 @@ def _locally_linear_embedding(
229229
)
230230

231231
M_sparse = eigen_solver != "dense"
232+
M_container_constructor = lil_matrix if M_sparse else np.zeros
232233

233234
if method == "standard":
234235
W = barycenter_kneighbors_graph(
@@ -239,7 +240,7 @@ def _locally_linear_embedding(
239240
# depending on the solver, we'll do this differently
240241
if M_sparse:
241242
M = eye(*W.shape, format=W.format) - W
242-
M = (M.T * M).tocsr()
243+
M = M.T * M
243244
else:
244245
M = (W.T * W - W.T - W).toarray()
245246
M.flat[:: M.shape[0] + 1] += 1 # W = W - I = W - I
@@ -262,7 +263,7 @@ def _locally_linear_embedding(
262263
Yi = np.empty((n_neighbors, 1 + n_components + dp), dtype=np.float64)
263264
Yi[:, 0] = 1
264265

265-
M = np.zeros((N, N), dtype=np.float64)
266+
M = M_container_constructor((N, N), dtype=np.float64)
266267

267268
use_svd = n_neighbors > d_in
268269

@@ -295,9 +296,6 @@ def _locally_linear_embedding(
295296
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
296297
M[nbrs_x, nbrs_y] += np.dot(w, w.T)
297298

298-
if M_sparse:
299-
M = csr_matrix(M)
300-
301299
elif method == "modified":
302300
if n_neighbors < n_components:
303301
raise ValueError("modified LLE requires n_neighbors >= n_components")
@@ -361,7 +359,8 @@ def _locally_linear_embedding(
361359

362360
# Now calculate M.
363361
# This is the [N x N] matrix whose null space is the desired embedding
364-
M = np.zeros((N, N), dtype=np.float64)
362+
M = M_container_constructor((N, N), dtype=np.float64)
363+
365364
for i in range(N):
366365
s_i = s_range[i]
367366

@@ -397,19 +396,16 @@ def _locally_linear_embedding(
397396
M[nbrs_x, nbrs_y] += np.dot(Wi, Wi.T)
398397
Wi_sum1 = Wi.sum(1)
399398
M[i, neighbors[i]] -= Wi_sum1
400-
M[neighbors[i], i] -= Wi_sum1
399+
M[neighbors[i], [i]] -= Wi_sum1
401400
M[i, i] += s_i
402401

403-
if M_sparse:
404-
M = csr_matrix(M)
405-
406402
elif method == "ltsa":
407403
neighbors = nbrs.kneighbors(
408404
X, n_neighbors=n_neighbors + 1, return_distance=False
409405
)
410406
neighbors = neighbors[:, 1:]
411407

412-
M = np.zeros((N, N))
408+
M = M_container_constructor((N, N), dtype=np.float64)
413409

414410
use_svd = n_neighbors > d_in
415411

@@ -432,7 +428,11 @@ def _locally_linear_embedding(
432428

433429
nbrs_x, nbrs_y = np.meshgrid(neighbors[i], neighbors[i])
434430
M[nbrs_x, nbrs_y] -= GiGiT
435-
M[neighbors[i], neighbors[i]] += 1
431+
432+
M[neighbors[i], neighbors[i]] += np.ones(shape=n_neighbors)
433+
434+
if M_sparse:
435+
M = M.tocsr()
436436

437437
return null_space(
438438
M,

0 commit comments

Comments
 (0)