Skip to content

Commit b2f4ed7

Browse files
fixed new xarray free handling in ttest and rank test
1 parent 6035624 commit b2f4ed7

File tree

2 files changed

+37
-27
lines changed

2 files changed

+37
-27
lines changed

diffxpy/stats/stats.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
from typing import Union
2-
31
import numpy as np
42
import numpy.linalg
3+
import scipy.sparse
54
import scipy.stats
5+
from typing import Union
66

77

88
def likelihood_ratio_test(
@@ -39,19 +39,18 @@ def likelihood_ratio_test(
3939

4040

4141
def mann_whitney_u_test(
42-
x0: np.ndarray,
43-
x1: np.ndarray,
42+
x0: Union[np.ndarray, scipy.sparse.csr_matrix],
43+
x1: Union[np.ndarray, scipy.sparse.csr_matrix]
4444
):
4545
"""
46-
Perform Wilcoxon rank sum test (Mann-Whitney U test) along second axis
47-
(for each gene).
46+
Perform Wilcoxon rank sum test (Mann-Whitney U test) along second axis, ie. for each gene.
4847
4948
The Wilcoxon rank sum test is a non-parameteric test
5049
to compare two groups of observations.
5150
52-
:param x0: np.array (observations x genes)
51+
:param x0: (observations x genes)
5352
Observations in first group by gene
54-
:param x1: np.array (observations x genes)
53+
:param x1: (observations x genes)
5554
Observations in second group by gene.
5655
"""
5756
axis = 1
@@ -66,8 +65,8 @@ def mann_whitney_u_test(
6665

6766
pvals = np.asarray([
6867
scipy.stats.mannwhitneyu(
69-
x=x0[:, i].flatten(),
70-
y=x1[:, i].flatten(),
68+
x=np.asarray(x0[:, i].todense()).flatten() if isinstance(x0, scipy.sparse.csr_matrix) else x0[:, i],
69+
y=np.asarray(x1[:, i].todense()).flatten() if isinstance(x0, scipy.sparse.csr_matrix) else x1[:, i],
7170
use_continuity=True,
7271
alternative="two-sided"
7372
).pvalue for i in range(x0.shape[1])

diffxpy/testing/det.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Union, Dict, Tuple, List, Set
44
import pandas as pd
55
from random import sample
6+
import scipy.sparse
67

78
import numpy as np
89
import patsy
@@ -521,7 +522,7 @@ def _ave(self):
521522
:return: np.ndarray
522523
"""
523524

524-
return np.mean(self.full_estim.x, axis=0)
525+
return np.asarray(np.mean(self.full_estim.x, axis=0)).flatten()
525526

526527
def _log_fold_change(self, factors: Union[Dict, Tuple, Set, List], base=np.e):
527528
"""
@@ -715,16 +716,16 @@ def __init__(
715716
self._store_ols = None
716717

717718
try:
718-
if model_estim._error_codes is not None:
719-
self._error_codes = model_estim._error_codes
719+
if self.model_estim.error_codes is not None:
720+
self._error_codes = self.model_estim.error_codes
720721
else:
721722
self._error_codes = None
722723
except Exception as e:
723724
self._error_codes = None
724725

725726
try:
726-
if model_estim._niter is not None:
727-
self._niter = model_estim._niter
727+
if self.model_estim.niter is not None:
728+
self._niter = self.model_estim.niter
728729
else:
729730
self._niter = None
730731
except Exception as e:
@@ -776,7 +777,7 @@ def _ave(self):
776777
777778
:return: np.ndarray
778779
"""
779-
return self.x.mean(axis=0)
780+
return np.asarray(self.x.mean(axis=0)).flatten()
780781

781782
def _test(self):
782783
"""
@@ -1530,8 +1531,8 @@ def __init__(
15301531
x0, x1 = split_x(data, grouping)
15311532

15321533
# Only compute p-values for genes with non-zero observations and non-zero group-wise variance.
1533-
mean_x0 = x0.mean(axis=0).astype(dtype=np.float)
1534-
mean_x1 = x1.mean(axis=0).astype(dtype=np.float)
1534+
mean_x0 = np.asarray(np.mean(x0, axis=0)).flatten().astype(dtype=np.float)
1535+
mean_x1 = np.asarray(np.mean(x1, axis=0)).flatten().astype(dtype=np.float)
15351536
# Avoid unnecessary mean computation:
15361537
self._mean = np.average(
15371538
a=np.vstack([mean_x0, mean_x1]),
@@ -1541,8 +1542,13 @@ def __init__(
15411542
returned=False
15421543
)
15431544
self._ave_nonzero = self._mean != 0 # omit all-zero features
1544-
var_x0 = np.asarray(x0.var(axis=0)).flatten().astype(dtype=np.float)
1545-
var_x1 = np.asarray(x1.var(axis=0)).flatten().astype(dtype=np.float)
1545+
if isinstance(x0, scipy.sparse.csr_matrix):
1546+
# Efficient analytic expression of variance without densification.
1547+
var_x0 = np.asarray(np.mean(x0.power(2), axis=0)).flatten().astype(dtype=np.float) - np.square(mean_x0)
1548+
var_x1 = np.asarray(np.mean(x1.power(2), axis=0)).flatten().astype(dtype=np.float) - np.square(mean_x1)
1549+
else:
1550+
var_x0 = np.asarray(np.var(x0, axis=0)).flatten().astype(dtype=np.float)
1551+
var_x1 = np.asarray(np.var(x1, axis=0)).flatten().astype(dtype=np.float)
15461552
self._var_geq_zero = np.logical_or(
15471553
var_x0 > 0,
15481554
var_x1 > 0
@@ -1649,8 +1655,8 @@ def __init__(
16491655

16501656
x0, x1 = split_x(data, grouping)
16511657

1652-
mean_x0 = x0.mean(axis=0).astype(dtype=np.float)
1653-
mean_x1 = x1.mean(axis=0).astype(dtype=np.float)
1658+
mean_x0 = np.asarray(np.mean(x0, axis=0)).flatten().astype(dtype=np.float)
1659+
mean_x1 = np.asarray(np.mean(x1, axis=0)).flatten().astype(dtype=np.float)
16541660
# Avoid unnecessary mean computation:
16551661
self._mean = np.average(
16561662
a=np.vstack([mean_x0, mean_x1]),
@@ -1659,19 +1665,24 @@ def __init__(
16591665
axis=0,
16601666
returned=False
16611667
)
1662-
var_x0 = np.asarray(x0.var(axis=0)).flatten().astype(dtype=np.float)
1663-
var_x1 = np.asarray(x1.var(axis=0)).flatten().astype(dtype=np.float)
1668+
if isinstance(x0, scipy.sparse.csr_matrix):
1669+
# Efficient analytic expression of variance without densification.
1670+
var_x0 = np.asarray(np.mean(x0.power(2), axis=0)).flatten().astype(dtype=np.float) - np.square(mean_x0)
1671+
var_x1 = np.asarray(np.mean(x1.power(2), axis=0)).flatten().astype(dtype=np.float) - np.square(mean_x1)
1672+
else:
1673+
var_x0 = np.asarray(np.var(x0, axis=0)).flatten().astype(dtype=np.float)
1674+
var_x1 = np.asarray(np.var(x1, axis=0)).flatten().astype(dtype=np.float)
16641675
self._var_geq_zero = np.logical_or(
16651676
var_x0 > 0,
16661677
var_x1 > 0
16671678
)
16681679
idx_run = np.where(np.logical_and(self._mean != 0, self._var_geq_zero))[0]
16691680

1670-
# TODO: can this be done on sparse?
1681+
# TODO: can this be done directly on sparse?
16711682
pval = np.zeros([data.shape[1]]) + np.nan
16721683
pval[idx_run] = stats.mann_whitney_u_test(
1673-
x0=np.asarray(x0[:, idx_run]),
1674-
x1=np.asarray(x1[:, idx_run])
1684+
x0=x0[:, idx_run],
1685+
x1=x1[:, idx_run]
16751686
)
16761687
pval[np.where(np.logical_and(
16771688
np.logical_and(mean_x0 == mean_x1, self._mean > 0),

0 commit comments

Comments
 (0)