Skip to content

Commit 82d6486

Browse files
fixed list / nd array bugs in enrich
1 parent fa4067f commit 82d6486

File tree

2 files changed

+42
-36
lines changed

2 files changed

+42
-36
lines changed

diffxpy/enrichment/enrich.py

Lines changed: 28 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from ..testing import correction
88
from ..testing.det import _DifferentialExpressionTest
99

10-
logger = logging.getLogger(__name__)
1110

1211
class RefSets:
1312
"""
@@ -42,15 +41,19 @@ def clean(self, ids):
4241

4342
def __init__(self, sets=None, fn=None, type='gmt'):
4443
if sets is not None:
45-
self.load_sets(sets, type=type)
46-
self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
44+
if len(sets) > 0:
45+
self.load_sets(sets, type=type)
46+
self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
47+
else:
48+
self.sets = []
49+
self._genes = np.array([])
4750
elif fn is not None:
4851
self.read_from_file(fn=fn, type=type)
4952
self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
5053
else:
5154
self.sets = []
5255
self._genes = np.array([])
53-
self._ids = [x.id for x in self.sets]
56+
self._ids = np.array([x.id for x in self.sets])
5457
self._set_lens = np.array([x.len for x in self.sets])
5558
self.genes_discarded = None
5659

@@ -113,7 +116,7 @@ def add(self, id: str, source: str, gene_ids: list):
113116
self.sets.append(self._Set(id=id, source=source, gene_ids=gene_ids))
114117
# Update summary variables:
115118
self._genes = np.sort(np.unique(np.concatenate([np.asarray(list(x.genes)) for x in self.sets])))
116-
self._ids = [x.id for x in self.sets]
119+
self._ids = np.array([x.id for x in self.sets])
117120
self._set_lens = np.array([x.len for x in self.sets])
118121

119122
## Processing functions.
@@ -165,7 +168,7 @@ def get_set(self, id):
165168
"""
166169
Return the set with a given set identifier.
167170
"""
168-
return self.sets[self._ids.index(id)]
171+
return self.sets[self._ids.tolist().index(id)]
169172

170173
## Overlap functions.
171174

@@ -205,31 +208,22 @@ def test(
205208
nice doc string and that the call to this is de.enrich.test which
206209
makes more sense to me than de.enrich.Enrich.
207210
208-
:param RefSets:
209-
The annotated gene sets against which enrichment is tested.
210-
:param DETest:
211-
The differential expression results object which is tested
211+
:param ref: The annotated gene sets against which enrichment is tested.
212+
:param det: The differential expression results object which is tested
212213
for enrichment in the gene sets.
213-
:param pval:
214-
Alternative to DETest, vector of p-values for differential expression.
215-
:param gene_ids:
216-
If pval was supplied instead of DETest, use gene_ids to supply the
214+
:param pval: Alternative to DETest, vector of p-values for differential expression.
215+
:param gene_ids: If pval was supplied instead of DETest, use gene_ids to supply the
217216
vector of gene identifiers (strings) that correspond to the p-values
218217
which can be matched against the identifieres in the sets in RefSets.
219-
:param de_threshold:
220-
Significance threshold at which a differential test (a multiple-testing
218+
:param de_threshold: Significance threshold at which a differential test (a multiple-testing
221219
corrected p-value) is called siginficant. T
222-
:param incl_all_zero:
223-
Wehther to include genes in gene universe which were all zero.
224-
:param all_ids:
225-
Set of all gene identifiers, this is used as the background set in the
220+
:param incl_all_zero: Wehther to include genes in gene universe which were all zero.
221+
:param all_ids: Set of all gene identifiers, this is used as the background set in the
226222
hypergeometric test. Only supply this if not all genes were tested
227223
and are supplied above in DETest or gene_ids.
228-
:param clean_ref:
229-
Whether or not to only retain gene identifiers in RefSets that occur in
224+
:param clean_ref: Whether or not to only retain gene identifiers in RefSets that occur in
230225
the background set of identifiers supplied here through all_ids.
231-
:param capital:
232-
Make all gene IDs captial.
226+
:param capital: Make all gene IDs captial.
233227
"""
234228
return Enrich(
235229
ref=ref,
@@ -263,8 +257,8 @@ def __init__(
263257
self._n_overlaps = None
264258
self._pval_enrich = None
265259
self._qval_enrich = None
266-
if isinstance(gene_ids, np.ndarray):
267-
gene_ids = gene_ids.tolist()
260+
if isinstance(gene_ids, list):
261+
gene_ids = np.asarray(gene_ids)
268262
# Load multiple-testing-corrected differential expression
269263
# p-values from differential expression output.
270264
if det is not None:
@@ -284,7 +278,7 @@ def __init__(
284278
# Select significant genes based on user defined threshold.
285279
if any([x is np.nan for x in self._gene_ids]):
286280
idx_notnan = np.where([x is not np.nan for x in self._gene_ids])[0]
287-
logger.info(
281+
logging.getLogger("diffxpy").info(
288282
" Discarded %i nan gene ids, leaving %i genes.",
289283
len(self._gene_ids) - len(idx_notnan),
290284
len(idx_notnan)
@@ -305,7 +299,7 @@ def __init__(
305299
self._significant_ids = set([x.upper() for x in self._significant_ids])
306300

307301
# Generate diagnostic statistic of number of possible overlaps in total.
308-
logger.info(
302+
logging.getLogger("diffxpy").info(
309303
" %i overlaps found between refset (%i) and provided gene list (%i).",
310304
len(set(self._all_ids).intersection(set(ref._genes))),
311305
len(ref._genes),
@@ -320,7 +314,7 @@ def __init__(
320314
# Print if there are empty sets.
321315
idx_nonempty = np.where([len(x.genes) > 0 for x in self.RefSets.sets])[0]
322316
if len(self.RefSets.sets) - len(idx_nonempty) > 0:
323-
logger.info(
317+
logging.getLogger("diffxpy").info(
324318
" Found %i empty sets, removing those.",
325319
len(self.RefSets.sets) - len(idx_nonempty)
326320
)
@@ -391,7 +385,10 @@ def significant_sets(self, threshold=0.05) -> list:
391385
"""
392386
Return significant sets from gene set enrichement analysis as an output table.
393387
"""
394-
return self.RefSets.subset(idx=np.where(self.qval <= threshold)[0])
388+
sig_sets = np.where(self.qval <= threshold)[0]
389+
if len(sig_sets) == 0:
390+
logging.getLogger("diffxpy").info("no significant sets found")
391+
return self.RefSets.subset(idx=sig_sets)
395392

396393
def significant_set_ids(self, threshold=0.05) -> np.array:
397394
"""
@@ -426,4 +423,4 @@ def set_summary(self, id: str):
426423
427424
:return: Slice of summary table.
428425
"""
429-
return self.summary(sort=False).iloc[self.RefSets._ids.index(id), :]
426+
return self.summary(sort=False).iloc[self.RefSets._ids.tolist().index(id), :]

diffxpy/unit_test/test_enrich.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,20 @@ def test_for_fatal(self):
3636
rs.add(id="set1", source="manual", gene_ids=["1", "3"])
3737
rs.add(id="set2", source="manual", gene_ids=["5", "6"])
3838

39-
enrich_test = de.enrich.test(
40-
ref=rs,
41-
det=test,
42-
de_threshold=0.05
43-
)
39+
for i in [True, False]:
40+
for j in [True, False]:
41+
enrich_test_i = de.enrich.test(
42+
ref=rs,
43+
det=test,
44+
de_threshold=0.05,
45+
incl_all_zero=i,
46+
clean_ref=j,
47+
)
48+
_ = enrich_test_i.summary()
49+
_ = enrich_test_i.significant_set_ids()
50+
_ = enrich_test_i.significant_sets()
51+
_ = enrich_test_i.set_summary(id="set1")
52+
4453
return True
4554

4655

0 commit comments

Comments
 (0)