3
3
from typing import Union , Dict , Tuple , List , Set
4
4
import pandas as pd
5
5
from random import sample
6
+ import scipy .sparse
6
7
7
8
import numpy as np
8
9
import patsy
@@ -521,7 +522,7 @@ def _ave(self):
521
522
:return: np.ndarray
522
523
"""
523
524
524
- return np .mean (self .full_estim .x , axis = 0 )
525
+ return np .asarray ( np . mean (self .full_estim .x , axis = 0 )). flatten ( )
525
526
526
527
def _log_fold_change (self , factors : Union [Dict , Tuple , Set , List ], base = np .e ):
527
528
"""
@@ -715,16 +716,16 @@ def __init__(
715
716
self ._store_ols = None
716
717
717
718
try :
718
- if model_estim ._error_codes is not None :
719
- self ._error_codes = model_estim ._error_codes
719
+ if self . model_estim .error_codes is not None :
720
+ self ._error_codes = self . model_estim .error_codes
720
721
else :
721
722
self ._error_codes = None
722
723
except Exception as e :
723
724
self ._error_codes = None
724
725
725
726
try :
726
- if model_estim ._niter is not None :
727
- self ._niter = model_estim ._niter
727
+ if self . model_estim .niter is not None :
728
+ self ._niter = self . model_estim .niter
728
729
else :
729
730
self ._niter = None
730
731
except Exception as e :
@@ -776,7 +777,7 @@ def _ave(self):
776
777
777
778
:return: np.ndarray
778
779
"""
779
- return self .x .mean (axis = 0 )
780
+ return np . asarray ( self .x .mean (axis = 0 )). flatten ( )
780
781
781
782
def _test (self ):
782
783
"""
@@ -1530,8 +1531,8 @@ def __init__(
1530
1531
x0 , x1 = split_x (data , grouping )
1531
1532
1532
1533
# Only compute p-values for genes with non-zero observations and non-zero group-wise variance.
1533
- mean_x0 = x0 . mean (axis = 0 ).astype (dtype = np .float )
1534
- mean_x1 = x1 . mean (axis = 0 ).astype (dtype = np .float )
1534
+ mean_x0 = np . asarray ( np . mean (x0 , axis = 0 )). flatten ( ).astype (dtype = np .float )
1535
+ mean_x1 = np . asarray ( np . mean (x1 , axis = 0 )). flatten ( ).astype (dtype = np .float )
1535
1536
# Avoid unnecessary mean computation:
1536
1537
self ._mean = np .average (
1537
1538
a = np .vstack ([mean_x0 , mean_x1 ]),
@@ -1541,8 +1542,13 @@ def __init__(
1541
1542
returned = False
1542
1543
)
1543
1544
self ._ave_nonzero = self ._mean != 0 # omit all-zero features
1544
- var_x0 = np .asarray (x0 .var (axis = 0 )).flatten ().astype (dtype = np .float )
1545
- var_x1 = np .asarray (x1 .var (axis = 0 )).flatten ().astype (dtype = np .float )
1545
+ if isinstance (x0 , scipy .sparse .csr_matrix ):
1546
+ # Efficient analytic expression of variance without densification.
1547
+ var_x0 = np .asarray (np .mean (x0 .power (2 ), axis = 0 )).flatten ().astype (dtype = np .float ) - np .square (mean_x0 )
1548
+ var_x1 = np .asarray (np .mean (x1 .power (2 ), axis = 0 )).flatten ().astype (dtype = np .float ) - np .square (mean_x1 )
1549
+ else :
1550
+ var_x0 = np .asarray (np .var (x0 , axis = 0 )).flatten ().astype (dtype = np .float )
1551
+ var_x1 = np .asarray (np .var (x1 , axis = 0 )).flatten ().astype (dtype = np .float )
1546
1552
self ._var_geq_zero = np .logical_or (
1547
1553
var_x0 > 0 ,
1548
1554
var_x1 > 0
@@ -1649,8 +1655,8 @@ def __init__(
1649
1655
1650
1656
x0 , x1 = split_x (data , grouping )
1651
1657
1652
- mean_x0 = x0 . mean (axis = 0 ).astype (dtype = np .float )
1653
- mean_x1 = x1 . mean (axis = 0 ).astype (dtype = np .float )
1658
+ mean_x0 = np . asarray ( np . mean (x0 , axis = 0 )). flatten ( ).astype (dtype = np .float )
1659
+ mean_x1 = np . asarray ( np . mean (x1 , axis = 0 )). flatten ( ).astype (dtype = np .float )
1654
1660
# Avoid unnecessary mean computation:
1655
1661
self ._mean = np .average (
1656
1662
a = np .vstack ([mean_x0 , mean_x1 ]),
@@ -1659,19 +1665,24 @@ def __init__(
1659
1665
axis = 0 ,
1660
1666
returned = False
1661
1667
)
1662
- var_x0 = np .asarray (x0 .var (axis = 0 )).flatten ().astype (dtype = np .float )
1663
- var_x1 = np .asarray (x1 .var (axis = 0 )).flatten ().astype (dtype = np .float )
1668
+ if isinstance (x0 , scipy .sparse .csr_matrix ):
1669
+ # Efficient analytic expression of variance without densification.
1670
+ var_x0 = np .asarray (np .mean (x0 .power (2 ), axis = 0 )).flatten ().astype (dtype = np .float ) - np .square (mean_x0 )
1671
+ var_x1 = np .asarray (np .mean (x1 .power (2 ), axis = 0 )).flatten ().astype (dtype = np .float ) - np .square (mean_x1 )
1672
+ else :
1673
+ var_x0 = np .asarray (np .var (x0 , axis = 0 )).flatten ().astype (dtype = np .float )
1674
+ var_x1 = np .asarray (np .var (x1 , axis = 0 )).flatten ().astype (dtype = np .float )
1664
1675
self ._var_geq_zero = np .logical_or (
1665
1676
var_x0 > 0 ,
1666
1677
var_x1 > 0
1667
1678
)
1668
1679
idx_run = np .where (np .logical_and (self ._mean != 0 , self ._var_geq_zero ))[0 ]
1669
1680
1670
- # TODO: can this be done on sparse?
1681
+ # TODO: can this be done directly on sparse?
1671
1682
pval = np .zeros ([data .shape [1 ]]) + np .nan
1672
1683
pval [idx_run ] = stats .mann_whitney_u_test (
1673
- x0 = np . asarray ( x0 [:, idx_run ]) ,
1674
- x1 = np . asarray ( x1 [:, idx_run ])
1684
+ x0 = x0 [:, idx_run ],
1685
+ x1 = x1 [:, idx_run ]
1675
1686
)
1676
1687
pval [np .where (np .logical_and (
1677
1688
np .logical_and (mean_x0 == mean_x1 , self ._mean > 0 ),
0 commit comments