Skip to content

Commit dbfeacc

Browse files
authored
Merge pull request #201 from JesseLivezey/index_mask
Index mask in Logistic regression
2 parents 4766eac + f719da9 commit dbfeacc

File tree

9 files changed

+429
-66
lines changed

9 files changed

+429
-66
lines changed

LICENSE.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
PyUol Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.
1+
PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.
22

33
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
44

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ PyUoI requires
4646

4747
* numpy>=1.14
4848
* h5py>=2.8
49-
* scikit-learn>=0.20
49+
* scikit-learn>=0.24
5050

5151
and optionally
5252

@@ -89,8 +89,8 @@ Please see our <a href="https://pyuoi.readthedocs.io/en/latest/">ReadTheDocs</a>
8989

9090
# Copyright
9191

92-
PyUol Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.
92+
PyUoI Copyright (c) 2019, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved.
9393

94-
If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov referring to " PyUol" (LBNL Ref 2019-157)."
94+
If you have questions about your rights to use or distribute this software, please contact Berkeley Lab's Innovation & Partnerships Office at IPO@lbl.gov referring to " PyUoI" (LBNL Ref 2019-157)."
9595

9696
NOTICE. This software was developed under funding from the U.S. Department of Energy. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, and perform publicly and display publicly. The U.S. Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so.

bin/generate_build.sh

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
eval "$(conda shell.bash hook)"
22
mkdir dist
3-
for py in 3.6 3.7; do
3+
for py in 3.6 3.7 3.8; do
44
git clone https://github.com/BouchardLab/pyuoi.git
55
cd pyuoi
66
conda create -y -n temp_build_env python=$py

docs/source/installation.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ PyUoI requires
2828

2929
* numpy>=1.14
3030
* h5py>=2.8
31-
* scikit-learn>=0.20
31+
* scikit-learn>=0.24
3232

3333
and optionally
3434

pyuoi/datasets/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,10 @@ def make_classification(n_samples=100, n_features=20, n_informative=2,
126126
if isinstance(random_state, int):
127127
rng = np.random.RandomState(random_state)
128128
else:
129-
rng = random_state
129+
if random_state is None:
130+
rng = np.random
131+
else:
132+
rng = random_state
130133
n_not_informative = n_features - n_informative
131134

132135
X = rng.randn(n_samples, n_features)

pyuoi/linear_model/logistic.py

Lines changed: 92 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -541,9 +541,10 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
541541
_, n_features = X.shape
542542

543543
classes = np.unique(y)
544+
n_classes = len(classes)
544545

545546
if multi_class == 'auto':
546-
if len(classes) > 2:
547+
if n_classes > 2:
547548
multi_class = 'multinomial'
548549
else:
549550
multi_class = 'ovr'
@@ -616,9 +617,19 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
616617
target = Y_multi
617618
if penalty == 'l2':
618619
w0 = w0.ravel()
620+
if coef_mask is not None:
621+
x0 = np.zeros_like(w0)
619622

620623
def func(x, *args):
621-
return _multinomial_loss_grad(x, *args)[0:2]
624+
mask = args[3]
625+
if mask is not None:
626+
x0[mask] = x
627+
args = args[:3] + (None,) + (args[-1],)
628+
f, df = _multinomial_loss_grad(x0, *args)[0:2]
629+
df = df[mask]
630+
else:
631+
f, df = _multinomial_loss_grad(x, *args)[0:2]
632+
return f, df
622633
else:
623634
w0 = w0.T.ravel().copy()
624635

@@ -634,7 +645,18 @@ def func(x, g, *args):
634645
else:
635646
target = y_bin
636647
if penalty == 'l2':
637-
func = _logistic_loss_and_grad
648+
x0 = np.zeros_like(w0)
649+
650+
def func(x, *args):
651+
mask = args[3]
652+
if mask is not None:
653+
x0[mask] = x
654+
args = args[:3] + (None,) + (args[-1],)
655+
f, df = _logistic_loss_and_grad(x0, *args)
656+
df = df[mask]
657+
else:
658+
f, df = _logistic_loss_and_grad(x, *args)
659+
return f, df
638660
else:
639661
def func(x, g, *args):
640662
loss, grad = _logistic_loss_and_grad(x, *args)
@@ -644,63 +666,79 @@ def func(x, g, *args):
644666
coefs = list()
645667
n_iter = np.zeros(len(Cs), dtype=np.int32)
646668
for i, C in enumerate(Cs):
647-
iprint = [-1, 50, 1, 100, 101][
648-
np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
649-
if penalty == 'l2':
650-
w0, loss, info = optimize.fmin_l_bfgs_b(
651-
func, w0, fprime=None,
652-
args=(X, target, 1. / C, coef_mask, sample_weight),
653-
iprint=iprint, pgtol=tol, maxiter=max_iter)
654-
else:
655-
zeros_seen = [0]
656-
657-
def zero_coef(x, *args):
658-
if multi_class == 'multinomial':
659-
x = x.reshape(-1, classes.size)[:-1]
660-
else:
661-
x = x[:-1]
662-
now_zeros = np.array_equiv(x, 0.)
663-
if now_zeros:
664-
zeros_seen[0] += 1
669+
if coef_mask is None or coef_mask.sum():
670+
iprint = [-1, 50, 1, 100, 101][
671+
np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
672+
if penalty == 'l2':
673+
if coef_mask is None:
674+
w0, loss, info = optimize.fmin_l_bfgs_b(
675+
func, w0, fprime=None,
676+
args=(X, target, 1. / C, coef_mask, sample_weight),
677+
iprint=iprint, pgtol=tol, maxiter=max_iter)
665678
else:
666-
zeros_seen[0] = 0
667-
if zeros_seen[0] > 1:
668-
return -2048
669-
try:
670-
w0 = fmin_lbfgs(func, w0, orthantwise_c=1. / C,
671-
args=(X, target, 0., coef_mask, sample_weight),
672-
max_iterations=max_iter,
673-
epsilon=tol,
674-
orthantwise_end=coef_size,
675-
progress=zero_coef)
676-
except AllZeroLBFGSError:
677-
w0 *= 0.
678-
info = None
679-
if info is not None and info["warnflag"] == 1:
680-
warnings.warn("lbfgs failed to converge. Increase the number "
681-
"of iterations.", ConvergenceWarning)
682-
# In scipy <= 1.0.0, nit may exceed maxiter.
683-
# See https://github.com/scipy/scipy/issues/7854.
684-
if info is None:
685-
n_iter_i = -1
686-
else:
687-
n_iter_i = min(info['nit'], max_iter)
679+
if fit_intercept:
680+
if multi_class == 'multinomial':
681+
mask = [coef_mask,
682+
np.ones(n_classes)[:, np.newaxis]]
683+
mask = np.concatenate(mask, axis=1)
684+
else:
685+
mask = np.concatenate([coef_mask, np.ones(1)])
686+
else:
687+
mask = coef_mask
688+
mask = np.nonzero(mask.ravel())[0]
689+
wp = w0[mask]
690+
wp, loss, info = optimize.fmin_l_bfgs_b(
691+
func, wp, fprime=None,
692+
args=(X, target, 1. / C, mask, sample_weight),
693+
iprint=iprint, pgtol=tol, maxiter=max_iter)
694+
w0 = np.zeros_like(w0)
695+
w0[mask] = wp
696+
697+
else:
698+
zeros_seen = [0]
699+
700+
def zero_coef(x, *args):
701+
if multi_class == 'multinomial':
702+
x = x.reshape(-1, classes.size)[:-1]
703+
else:
704+
x = x[:-1]
705+
now_zeros = np.array_equiv(x, 0.)
706+
if now_zeros:
707+
zeros_seen[0] += 1
708+
else:
709+
zeros_seen[0] = 0
710+
if zeros_seen[0] > 1:
711+
return -2048
712+
try:
713+
args = (X, target, 0., coef_mask, sample_weight)
714+
w0 = fmin_lbfgs(func, w0, orthantwise_c=1. / C,
715+
args=args,
716+
max_iterations=max_iter,
717+
epsilon=tol,
718+
orthantwise_end=coef_size,
719+
progress=zero_coef)
720+
except AllZeroLBFGSError:
721+
w0 *= 0.
722+
info = None
723+
if info is not None and info["warnflag"] == 1:
724+
warnings.warn("lbfgs failed to converge. Increase the number "
725+
"of iterations.", ConvergenceWarning)
726+
# In scipy <= 1.0.0, nit may exceed maxiter.
727+
# See https://github.com/scipy/scipy/issues/7854.
728+
if info is None:
729+
n_iter_i = -1
730+
else:
731+
n_iter_i = min(info['nit'], max_iter)
732+
733+
n_iter[i] = n_iter_i
688734

689735
if multi_class == 'multinomial':
690736
n_classes = max(2, classes.size)
691737
if penalty == 'l2':
692-
multi_w0 = np.reshape(w0, (n_classes, -1))
738+
w0 = np.reshape(w0, (n_classes, -1))
693739
else:
694-
multi_w0 = np.reshape(w0, (-1, n_classes)).T
695-
if coef_mask is not None:
696-
multi_w0[:, :n_features] *= coef_mask
697-
coefs.append(multi_w0.copy())
698-
else:
699-
if coef_mask is not None:
700-
w0[:n_features] *= coef_mask
701-
coefs.append(w0.copy())
702-
703-
n_iter[i] = n_iter_i
740+
w0 = np.reshape(w0, (-1, n_classes)).T
741+
coefs.append(w0.copy())
704742

705743
return np.array(coefs), np.array(Cs), n_iter
706744

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
numpy>=1.14
22
h5py>=2.8
3-
scikit-learn>=0.20
3+
scikit-learn>=0.24

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def finalize_options(self):
4646
# Versions should comply with PEP440. For a discussion on single-sourcing
4747
# the version across setup.py and the project code, see
4848
# https://packaging.python.org/en/latest/single_source_version.html
49-
version='1.0.0',
49+
version='1.1.0',
5050

5151
description='The Union of Intersections framework in Python.',
5252
long_description=long_description,

0 commit comments

Comments
 (0)