1
1
import unittest
2
2
import logging
3
3
import numpy as np
4
- import pandas as pd
5
4
import scipy .stats as stats
6
5
7
6
from batchglm .api .models .glm_nb import Simulator
@@ -18,10 +17,7 @@ def _prepare_data(self, n_cells: int = 2000, n_genes: int = 100):
18
17
"""
19
18
sim = Simulator (num_observations = n_cells , num_features = n_genes )
20
19
sim .generate_sample_description (num_batches = 0 , num_conditions = 2 )
21
- sim .generate_params (
22
- rand_fn_ave = lambda shape : np .random .poisson (500 , shape ) + 1 ,
23
- rand_fn = lambda shape : np .abs (np .random .uniform (1 , 0.5 , shape ))
24
- )
20
+ sim .generate_params ()
25
21
sim .generate_data ()
26
22
27
23
return sim
@@ -45,8 +41,8 @@ def _eval(self, test, ref_pvals):
45
41
'mean absolute log p-value deviation: %f' %
46
42
float (mean_dev )
47
43
)
48
- assert max_dev < 1e-3 , "maximum deviation too large"
49
- assert max_log_dev < 1e-1 , "maximum deviation in log space too large"
44
+ assert max_dev < 1e-3 , "maximum deviation too large: %f" % max_dev
45
+ assert max_log_dev < 1e-1 , "maximum deviation in log space too large: %f" % max_log_dev
50
46
51
47
def test_t_test_ref (self , n_cells : int = 2000 , n_genes : int = 100 ):
52
48
"""
@@ -59,23 +55,25 @@ def test_t_test_ref(self, n_cells: int = 2000, n_genes: int = 100):
59
55
logging .getLogger ("batchglm" ).setLevel (logging .WARNING )
60
56
logging .getLogger ("diffxpy" ).setLevel (logging .INFO )
61
57
58
+ np .random .seed (1 )
62
59
sim = self ._prepare_data (n_cells = n_cells , n_genes = n_genes )
63
-
64
60
test = de .test .t_test (
65
- data = sim .x ,
61
+ data = sim .input_data ,
66
62
grouping = "condition" ,
67
- sample_description = sim .sample_description ,
68
- dtype = "float64"
63
+ sample_description = sim .sample_description
69
64
)
70
65
71
66
# Run scipy t-tests as a reference.
72
67
conds = np .unique (sim .sample_description ["condition" ].values )
73
68
ind_a = np .where (sim .sample_description ["condition" ] == conds [0 ])[0 ]
74
69
ind_b = np .where (sim .sample_description ["condition" ] == conds [1 ])[0 ]
75
- scipy_pvals = stats .ttest_ind (a = sim .X [ind_a , :], b = sim .X [ind_b , :], axis = 0 , equal_var = False ).pvalue
76
-
70
+ scipy_pvals = stats .ttest_ind (
71
+ a = sim .x [ind_a , :],
72
+ b = sim .x [ind_b , :],
73
+ axis = 0 ,
74
+ equal_var = False
75
+ ).pvalue
77
76
self ._eval (test = test , ref_pvals = scipy_pvals )
78
-
79
77
return True
80
78
81
79
def test_rank_ref (self , n_cells : int = 2000 , n_genes : int = 100 ):
@@ -89,27 +87,28 @@ def test_rank_ref(self, n_cells: int = 2000, n_genes: int = 100):
89
87
logging .getLogger ("batchglm" ).setLevel (logging .WARNING )
90
88
logging .getLogger ("diffxpy" ).setLevel (logging .INFO )
91
89
90
+ np .random .seed (1 )
92
91
sim = self ._prepare_data (n_cells = n_cells , n_genes = n_genes )
93
-
94
92
test = de .test .rank_test (
95
- data = sim .x ,
93
+ data = sim .input_data ,
96
94
grouping = "condition" ,
97
- sample_description = sim .sample_description ,
98
- dtype = "float64"
95
+ sample_description = sim .sample_description
99
96
)
100
97
101
98
# Run scipy t-tests as a reference.
102
99
conds = np .unique (sim .sample_description ["condition" ].values )
103
100
ind_a = np .where (sim .sample_description ["condition" ] == conds [0 ])[0 ]
104
101
ind_b = np .where (sim .sample_description ["condition" ] == conds [1 ])[0 ]
105
102
scipy_pvals = np .array ([
106
- stats .mannwhitneyu (x = sim .X [ind_a , i ], y = sim .X [ind_b , i ],
107
- use_continuity = True , alternative = "two-sided" ).pvalue
108
- for i in range (sim .X .shape [1 ])
109
- ])
110
-
103
+ stats .mannwhitneyu (
104
+ x = sim .x [ind_a , i ],
105
+ y = sim .x [ind_b , i ],
106
+ use_continuity = True ,
107
+ alternative = "two-sided"
108
+ ).pvalue
109
+ for i in range (sim .x .shape [1 ])
110
+ ])
111
111
self ._eval (test = test , ref_pvals = scipy_pvals )
112
-
113
112
return True
114
113
115
114
0 commit comments