Skip to content

Commit be6105a

Browse files
added fitting module
1 parent b2f4ed7 commit be6105a

File tree

7 files changed

+609
-7
lines changed

7 files changed

+609
-7
lines changed

diffxpy/api/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from . import test
55
from . import enrich
6+
from . import fit
67
from . import stats
78
from . import utils
89
from .. import pkg_constants

diffxpy/api/fit.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from diffxpy.fit import model, residuals

diffxpy/fit/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .fit import model, residuals

diffxpy/fit/external.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from diffxpy.testing.tests import _fit
2+
from diffxpy.testing.utils import parse_gene_names, parse_sample_description, parse_size_factors, \
3+
constraint_system_from_star

diffxpy/fit/fit.py

Lines changed: 400 additions & 0 deletions
Large diffs are not rendered by default.

diffxpy/testing/tests.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,14 @@
1-
from typing import Union, List, Dict, Callable, Tuple
2-
31
import anndata
2+
try:
3+
from anndata.base import Raw
4+
except ImportError:
5+
from anndata import Raw
46
import logging
57
import numpy as np
68
import pandas as pd
79
import patsy
810
import scipy.sparse
9-
10-
try:
11-
from anndata.base import Raw
12-
except ImportError:
13-
from anndata import Raw
11+
from typing import Union, List, Dict, Callable, Tuple
1412

1513
from batchglm import data as data_utils
1614
from batchglm.models.base import _EstimatorBase, _InputDataBase

diffxpy/unit_test/test_fit.py

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
import unittest
2+
import logging
3+
import numpy as np
4+
import pandas as pd
5+
6+
import diffxpy.api as de
7+
8+
9+
class _TestFit:
10+
11+
def _test_model_fit(
12+
self,
13+
n_cells: int,
14+
n_genes: int,
15+
noise_model: str
16+
):
17+
"""
18+
Test if de.wald() generates a uniform p-value distribution
19+
if it is given data simulated based on the null model. Returns the p-value
20+
of the two-side Kolmgorov-Smirnov test for equality of the observed
21+
p-value distribution and a uniform distribution.
22+
23+
:param n_cells: Number of cells to simulate (number of observations per test).
24+
:param n_genes: Number of genes to simulate (number of tests).
25+
:param noise_model: Noise model to use for data fitting.
26+
"""
27+
if noise_model == "nb":
28+
from batchglm.api.models.glm_nb import Simulator
29+
rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
30+
elif noise_model == "norm":
31+
from batchglm.api.models.glm_norm import Simulator
32+
rand_fn_scale = lambda shape: np.random.uniform(1, 2, shape)
33+
else:
34+
raise ValueError("noise model %s not recognized" % noise_model)
35+
36+
sim = Simulator(num_observations=n_cells, num_features=n_genes)
37+
sim.generate_sample_description(num_batches=0, num_conditions=0)
38+
sim.generate_params(rand_fn_scale=rand_fn_scale)
39+
sim.generate_data()
40+
41+
random_sample_description = pd.DataFrame({
42+
"condition": np.random.randint(2, size=sim.nobs),
43+
"batch": np.random.randint(2, size=sim.nobs)
44+
})
45+
46+
estim = de.fit.model(
47+
data=sim.input_data,
48+
sample_description=random_sample_description,
49+
formula_loc="~ 1 + condition + batch",
50+
noise_model=noise_model
51+
)
52+
return True
53+
54+
def _test_residuals_fit(
55+
self,
56+
n_cells: int,
57+
n_genes: int,
58+
noise_model: str
59+
):
60+
"""
61+
Test if de.wald() (multivariate mode) generates a uniform p-value distribution
62+
if it is given data simulated based on the null model. Returns the p-value
63+
of the two-side Kolmgorov-Smirnov test for equality of the observed
64+
p-value distribution and a uniform distribution.
65+
66+
:param n_cells: Number of cells to simulate (number of observations per test).
67+
:param n_genes: Number of genes to simulate (number of tests).
68+
:param noise_model: Noise model to use for data fitting.
69+
"""
70+
if noise_model == "nb":
71+
from batchglm.api.models.glm_nb import Simulator
72+
elif noise_model == "norm":
73+
from batchglm.api.models.glm_norm import Simulator
74+
else:
75+
raise ValueError("noise model %s not recognized" % noise_model)
76+
77+
sim = Simulator(num_observations=n_cells, num_features=n_genes)
78+
sim.generate_sample_description(num_batches=0, num_conditions=0)
79+
sim.generate()
80+
81+
random_sample_description = pd.DataFrame({
82+
"condition": np.random.randint(2, size=sim.nobs),
83+
"batch": np.random.randint(2, size=sim.nobs)
84+
})
85+
86+
res = de.fit.residuals(
87+
data=sim.input_data,
88+
sample_description=random_sample_description,
89+
formula_loc="~ 1 + condition + batch",
90+
noise_model=noise_model
91+
)
92+
return True
93+
94+
95+
class TestFitNb(_TestFit, unittest.TestCase):
96+
"""
97+
Negative binomial noise model unit tests that tests whether model fit relay works.
98+
"""
99+
100+
def test_model_fit(
101+
self,
102+
n_cells: int = 2000,
103+
n_genes: int = 2
104+
):
105+
"""
106+
Test if wald() generates a uniform p-value distribution for "nb" noise model.
107+
108+
:param n_cells: Number of cells to simulate (number of observations per test).
109+
:param n_genes: Number of genes to simulate (number of tests).
110+
"""
111+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
112+
logging.getLogger("batchglm").setLevel(logging.WARNING)
113+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
114+
115+
np.random.seed(1)
116+
return self._test_model_fit(
117+
n_cells=n_cells,
118+
n_genes=n_genes,
119+
noise_model="nb"
120+
)
121+
122+
def test_residuals_fit(
123+
self,
124+
n_cells: int = 2000,
125+
n_genes: int = 2
126+
):
127+
"""
128+
Test if wald() generates a uniform p-value distribution for "nb" noise model
129+
for multiple coefficients to test.
130+
131+
:param n_cells: Number of cells to simulate (number of observations per test).
132+
:param n_genes: Number of genes to simulate (number of tests).
133+
"""
134+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
135+
logging.getLogger("batchglm").setLevel(logging.WARNING)
136+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
137+
138+
np.random.seed(1)
139+
return self._test_residuals_fit(
140+
n_cells=n_cells,
141+
n_genes=n_genes,
142+
noise_model="nb"
143+
)
144+
145+
146+
class TestFitNorm(_TestFit, unittest.TestCase):
147+
"""
148+
Normal noise model unit tests that tests whether model fit relay works.
149+
"""
150+
151+
def test_model_fit(
152+
self,
153+
n_cells: int = 2000,
154+
n_genes: int = 2
155+
):
156+
"""
157+
Test if wald() generates a uniform p-value distribution for "norm" noise model.
158+
159+
:param n_cells: Number of cells to simulate (number of observations per test).
160+
:param n_genes: Number of genes to simulate (number of tests).
161+
"""
162+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
163+
logging.getLogger("batchglm").setLevel(logging.WARNING)
164+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
165+
166+
np.random.seed(1)
167+
return self._test_model_fit(
168+
n_cells=n_cells,
169+
n_genes=n_genes,
170+
noise_model="norm"
171+
)
172+
173+
def test_residuals_fit(
174+
self,
175+
n_cells: int = 2000,
176+
n_genes: int = 2
177+
):
178+
"""
179+
Test if wald() generates a uniform p-value distribution for "norm" noise model
180+
for multiple coefficients to test.
181+
182+
:param n_cells: Number of cells to simulate (number of observations per test).
183+
:param n_genes: Number of genes to simulate (number of tests).
184+
"""
185+
logging.getLogger("tensorflow").setLevel(logging.ERROR)
186+
logging.getLogger("batchglm").setLevel(logging.WARNING)
187+
logging.getLogger("diffxpy").setLevel(logging.WARNING)
188+
189+
np.random.seed(1)
190+
return self._test_residuals_fit(
191+
n_cells=n_cells,
192+
n_genes=n_genes,
193+
noise_model="norm"
194+
)
195+
196+
197+
if __name__ == '__main__':
198+
unittest.main()

0 commit comments

Comments
 (0)