Skip to content

Commit e9ddb6a

Browse files
authored
Merge pull request #9 from edahelsinki/development
Numba tweaks: - Default to num_threads = 1 to avoid horrible performance on some CPUs - Add signatures to jitted functions (for faster and more reusable jitting)
2 parents 0e66b18 + c064462 commit e9ddb6a

File tree

5 files changed

+390
-186
lines changed

5 files changed

+390
-186
lines changed

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "slise"
3-
version = "2.2.1"
3+
version = "2.2.2"
44
authors = [{ name = "Anton Björklund", email = "anton.bjorklund@helsinki.fi" }]
55
description = "The SLISE algorithm for robust regression and explanations of black box models"
66
readme = "README.md"
@@ -29,7 +29,7 @@ dependencies = [
2929
]
3030

3131
[project.optional-dependencies]
32-
dev = ["pytest", "black[jupyter]", "pylint", "tbb", "icc_rt", "IPython"]
32+
dev = ["pytest", "black[jupyter]", "pylint", "icc_rt", "IPython"]
3333
tbb = ["tbb"]
3434

3535
[project.urls]

slise/initialisation.py

Lines changed: 29 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,16 @@ def fast_lstsq(
3232
if weight is None or x.shape[1] <= max_iterations * 20:
3333
return np.linalg.lstsq(x, y, rcond=None)[0]
3434
else:
35-
return regularised_regression(x, y, 0, 0, weight, max_iterations)
35+
return regularised_regression(x, y, 0.0, 0.0, weight, max_iterations)
3636

3737

3838
def initialise_lasso(
3939
X: np.ndarray,
4040
Y: np.ndarray,
41-
epsilon: float = 0,
41+
epsilon: float = 0.0,
4242
weight: Optional[np.ndarray] = None,
4343
max_iterations: int = 300,
44-
**kwargs
44+
**kwargs,
4545
) -> Tuple[np.ndarray, float]:
4646
"""Initialise `alpha` and `beta` to be equivalent to LASSO.
4747
@@ -63,12 +63,12 @@ def initialise_ols(
6363
Y: np.ndarray,
6464
epsilon: float,
6565
weight: Optional[np.ndarray] = None,
66-
beta_max: float = 20,
66+
beta_max: float = 20.0,
6767
max_approx: float = 1.15,
6868
max_iterations: int = 300,
6969
beta_max_init: float = 2.5,
7070
min_beta_step: float = 1e-8,
71-
**kwargs
71+
**kwargs,
7272
) -> Tuple[np.ndarray, float]:
7373
"""Initialise `alpha` to OLS and `beta` to [slise.optimisation.next_beta][].
7474
@@ -89,10 +89,8 @@ def initialise_ols(
8989
alpha = fast_lstsq(X, Y, weight, max_iterations)
9090
epsilon = epsilon**2
9191
beta_max = min(beta_max, beta_max_init) / epsilon
92-
residuals = (Y - X @ alpha) ** 2
93-
beta = next_beta(
94-
residuals, epsilon, 0, weight, beta_max, log(max_approx), min_beta_step
95-
)
92+
r2 = (Y - X @ alpha) ** 2
93+
beta = next_beta(r2, epsilon, 0, weight, beta_max, log(max_approx), min_beta_step)
9694
return alpha, beta
9795

9896

@@ -101,11 +99,11 @@ def initialise_zeros(
10199
Y: np.ndarray,
102100
epsilon: float,
103101
weight: Optional[np.ndarray] = None,
104-
beta_max: float = 20,
102+
beta_max: float = 20.0,
105103
max_approx: float = 1.15,
106104
beta_max_init: float = 2.5,
107105
min_beta_step: float = 1e-8,
108-
**kwargs
106+
**kwargs,
109107
) -> Tuple[np.ndarray, float]:
110108
"""Initialise `alpha` to 0 and `beta` to [slise.optimisation.next_beta][].
111109
@@ -136,7 +134,7 @@ def initialise_fixed(
136134
Y: np.ndarray,
137135
epsilon: float,
138136
weight: Optional[np.ndarray] = None,
139-
beta_max: float = 20,
137+
beta_max: float = 20.0,
140138
max_approx: float = 1.15,
141139
beta_max_init: float = 2.5,
142140
min_beta_step: float = 1e-8,
@@ -163,14 +161,8 @@ def initialise_fixed(
163161
epsilon = epsilon**2
164162
beta_max = min(beta_max, beta_max_init) / epsilon
165163
alpha = init
166-
beta = next_beta(
167-
(X @ alpha - Y) ** 2,
168-
epsilon,
169-
0,
170-
weight,
171-
beta_max,
172-
log(max_approx),
173-
)
164+
r2 = (X @ alpha - Y) ** 2
165+
beta = next_beta(r2, epsilon, 0, weight, beta_max, log(max_approx))
174166
return alpha, beta
175167

176168

@@ -196,14 +188,14 @@ def initialise_candidates(
196188
Y: np.ndarray,
197189
epsilon: float,
198190
weight: Optional[np.ndarray] = None,
199-
beta_max: float = 20,
191+
beta_max: float = 20.0,
200192
max_approx: float = 1.15,
201193
pca_treshold: int = 10,
202194
num_init: Optional[int] = None,
203195
max_iterations: int = 300,
204196
beta_max_init: float = 2.5,
205197
min_beta_step: float = 1e-8,
206-
**kwargs
198+
**kwargs,
207199
) -> Tuple[np.ndarray, float]:
208200
"""Generate a number (num_init) of candidates, using PCA to shrink the random subsets.
209201
Then select the best one to be `alpha` and `beta` to be the corresponding [slise.optimisation.next_beta][].
@@ -236,19 +228,19 @@ def initialise_candidates(
236228
alpha = np.zeros(X.shape[1])
237229
residuals = Y**2
238230
beta = next_beta(residuals, epsilon, 0, weight, beta_max, max_approx, min_beta_step)
239-
loss = loss_residuals(alpha, residuals, epsilon, beta, 0, 0, weight)
231+
loss = loss_residuals(alpha, residuals, epsilon, beta, 0.0, 0.0, weight)
240232
# Find the candidate with the best loss for the next_beta
241233
for i in range(num_init):
242234
try:
243235
model = __create_candidate(X, Y, weight, pca_treshold, max_iterations)
244-
residuals2 = (Y - X @ model) ** 2
245-
loss2 = loss_residuals(model, residuals2, epsilon, beta, 0, 0, weight)
236+
r2 = (Y - X @ model) ** 2
237+
loss2 = loss_residuals(model, r2, epsilon, beta, 0.0, 0.0, weight)
246238
if loss2 < loss:
247239
alpha = model
248240
beta = next_beta(
249-
residuals2, epsilon, 0, weight, beta_max, max_approx, min_beta_step
241+
r2, epsilon, 0.0, weight, beta_max, max_approx, min_beta_step
250242
)
251-
loss = loss_residuals(model, residuals2, epsilon, beta, 0, 0, weight)
243+
loss = loss_residuals(model, r2, epsilon, beta, 0.0, 0.0, weight)
252244
except np.linalg.LinAlgError:
253245
pass
254246
return alpha, beta
@@ -264,7 +256,7 @@ def __create_candidate2(
264256
X = X[sel, :]
265257
Y = Y[sel]
266258
with catch_warnings(record=False):
267-
reg = regularised_regression(X, Y, 1e-8, 0, max_iterations)
259+
reg = regularised_regression(X, Y, 1e-8, 0.0, max_iterations=max_iterations)
268260
return reg
269261

270262

@@ -273,13 +265,13 @@ def initialise_candidates2(
273265
Y: np.ndarray,
274266
epsilon: float,
275267
weight: Optional[np.ndarray] = None,
276-
beta_max: float = 20,
268+
beta_max: float = 20.0,
277269
max_approx: float = 1.15,
278270
num_init: Optional[int] = None,
279271
max_iterations: int = 300,
280272
beta_max_init: float = 2.5,
281273
min_beta_step: float = 1e-8,
282-
**kwargs
274+
**kwargs,
283275
) -> Tuple[np.ndarray, float]:
284276
"""Generate a number (num_init) of candidates, using LASSO to shrink the random subsets.
285277
Then select the best one to be `alpha` and `beta` to be the corresponding [slise.optimisation.next_beta][].
@@ -309,21 +301,21 @@ def initialise_candidates2(
309301
weight = weight / np.sum(weight)
310302
# Initial model (zeros)
311303
alpha = np.zeros(X.shape[1])
312-
residuals = Y**2
313-
beta = next_beta(residuals, epsilon, 0, weight, beta_max, max_approx, min_beta_step)
314-
loss = loss_residuals(alpha, residuals, epsilon, beta, 0, 0, weight)
304+
r2 = Y**2
305+
beta = next_beta(r2, epsilon, 0.0, weight, beta_max, max_approx, min_beta_step)
306+
loss = loss_residuals(alpha, r2, epsilon, beta, 0.0, 0.0, weight)
315307
# Find the candidate with the best loss for the next_beta
316308
for i in range(num_init):
317309
try:
318310
model = __create_candidate2(X, Y, weight, max_iterations)
319-
residuals2 = (Y - X @ model) ** 2
320-
loss2 = loss_residuals(model, residuals2, epsilon, beta, 0, 0, weight)
311+
r2 = (Y - X @ model) ** 2
312+
loss2 = loss_residuals(model, r2, epsilon, beta, 0.0, 0.0, weight)
321313
if loss2 < loss:
322314
alpha = model
323315
beta = next_beta(
324-
residuals2, epsilon, 0, weight, beta_max, max_approx, min_beta_step
316+
r2, epsilon, 0.0, weight, beta_max, max_approx, min_beta_step
325317
)
326-
loss = loss_residuals(model, residuals2, epsilon, beta, 0, 0, weight)
318+
loss = loss_residuals(model, r2, epsilon, beta, 0.0, 0.0, weight)
327319
except np.linalg.LinAlgError:
328320
pass
329321
return alpha, beta

0 commit comments

Comments
 (0)