7
7
from ..testing import correction
8
8
from ..testing .det import _DifferentialExpressionTest
9
9
10
- logger = logging .getLogger (__name__ )
11
10
12
11
class RefSets :
13
12
"""
@@ -42,15 +41,19 @@ def clean(self, ids):
42
41
43
42
def __init__ (self , sets = None , fn = None , type = 'gmt' ):
44
43
if sets is not None :
45
- self .load_sets (sets , type = type )
46
- self ._genes = np .sort (np .unique (np .concatenate ([np .asarray (list (x .genes )) for x in self .sets ])))
44
+ if len (sets ) > 0 :
45
+ self .load_sets (sets , type = type )
46
+ self ._genes = np .sort (np .unique (np .concatenate ([np .asarray (list (x .genes )) for x in self .sets ])))
47
+ else :
48
+ self .sets = []
49
+ self ._genes = np .array ([])
47
50
elif fn is not None :
48
51
self .read_from_file (fn = fn , type = type )
49
52
self ._genes = np .sort (np .unique (np .concatenate ([np .asarray (list (x .genes )) for x in self .sets ])))
50
53
else :
51
54
self .sets = []
52
55
self ._genes = np .array ([])
53
- self ._ids = [x .id for x in self .sets ]
56
+ self ._ids = np . array ( [x .id for x in self .sets ])
54
57
self ._set_lens = np .array ([x .len for x in self .sets ])
55
58
self .genes_discarded = None
56
59
@@ -113,7 +116,7 @@ def add(self, id: str, source: str, gene_ids: list):
113
116
self .sets .append (self ._Set (id = id , source = source , gene_ids = gene_ids ))
114
117
# Update summary variables:
115
118
self ._genes = np .sort (np .unique (np .concatenate ([np .asarray (list (x .genes )) for x in self .sets ])))
116
- self ._ids = [x .id for x in self .sets ]
119
+ self ._ids = np . array ( [x .id for x in self .sets ])
117
120
self ._set_lens = np .array ([x .len for x in self .sets ])
118
121
119
122
## Processing functions.
@@ -165,7 +168,7 @@ def get_set(self, id):
165
168
"""
166
169
Return the set with a given set identifier.
167
170
"""
168
- return self .sets [self ._ids .index (id )]
171
+ return self .sets [self ._ids .tolist (). index (id )]
169
172
170
173
## Overlap functions.
171
174
@@ -205,31 +208,22 @@ def test(
205
208
nice doc string and that the call to this is de.enrich.test which
206
209
makes more sense to me than de.enrich.Enrich.
207
210
208
- :param RefSets:
209
- The annotated gene sets against which enrichment is tested.
210
- :param DETest:
211
- The differential expression results object which is tested
211
+ :param ref: The annotated gene sets against which enrichment is tested.
212
+ :param det: The differential expression results object which is tested
212
213
for enrichment in the gene sets.
213
- :param pval:
214
- Alternative to DETest, vector of p-values for differential expression.
215
- :param gene_ids:
216
- If pval was supplied instead of DETest, use gene_ids to supply the
214
+ :param pval: Alternative to DETest, vector of p-values for differential expression.
215
+ :param gene_ids: If pval was supplied instead of DETest, use gene_ids to supply the
217
216
vector of gene identifiers (strings) that correspond to the p-values
218
217
which can be matched against the identifieres in the sets in RefSets.
219
- :param de_threshold:
220
- Significance threshold at which a differential test (a multiple-testing
218
+ :param de_threshold: Significance threshold at which a differential test (a multiple-testing
221
219
corrected p-value) is called siginficant. T
222
- :param incl_all_zero:
223
- Wehther to include genes in gene universe which were all zero.
224
- :param all_ids:
225
- Set of all gene identifiers, this is used as the background set in the
220
+ :param incl_all_zero: Wehther to include genes in gene universe which were all zero.
221
+ :param all_ids: Set of all gene identifiers, this is used as the background set in the
226
222
hypergeometric test. Only supply this if not all genes were tested
227
223
and are supplied above in DETest or gene_ids.
228
- :param clean_ref:
229
- Whether or not to only retain gene identifiers in RefSets that occur in
224
+ :param clean_ref: Whether or not to only retain gene identifiers in RefSets that occur in
230
225
the background set of identifiers supplied here through all_ids.
231
- :param capital:
232
- Make all gene IDs captial.
226
+ :param capital: Make all gene IDs captial.
233
227
"""
234
228
return Enrich (
235
229
ref = ref ,
@@ -263,8 +257,8 @@ def __init__(
263
257
self ._n_overlaps = None
264
258
self ._pval_enrich = None
265
259
self ._qval_enrich = None
266
- if isinstance (gene_ids , np . ndarray ):
267
- gene_ids = gene_ids . tolist ( )
260
+ if isinstance (gene_ids , list ):
261
+ gene_ids = np . asarray ( gene_ids )
268
262
# Load multiple-testing-corrected differential expression
269
263
# p-values from differential expression output.
270
264
if det is not None :
@@ -284,7 +278,7 @@ def __init__(
284
278
# Select significant genes based on user defined threshold.
285
279
if any ([x is np .nan for x in self ._gene_ids ]):
286
280
idx_notnan = np .where ([x is not np .nan for x in self ._gene_ids ])[0 ]
287
- logger .info (
281
+ logging . getLogger ( "diffxpy" ) .info (
288
282
" Discarded %i nan gene ids, leaving %i genes." ,
289
283
len (self ._gene_ids ) - len (idx_notnan ),
290
284
len (idx_notnan )
@@ -305,7 +299,7 @@ def __init__(
305
299
self ._significant_ids = set ([x .upper () for x in self ._significant_ids ])
306
300
307
301
# Generate diagnostic statistic of number of possible overlaps in total.
308
- logger .info (
302
+ logging . getLogger ( "diffxpy" ) .info (
309
303
" %i overlaps found between refset (%i) and provided gene list (%i)." ,
310
304
len (set (self ._all_ids ).intersection (set (ref ._genes ))),
311
305
len (ref ._genes ),
@@ -320,7 +314,7 @@ def __init__(
320
314
# Print if there are empty sets.
321
315
idx_nonempty = np .where ([len (x .genes ) > 0 for x in self .RefSets .sets ])[0 ]
322
316
if len (self .RefSets .sets ) - len (idx_nonempty ) > 0 :
323
- logger .info (
317
+ logging . getLogger ( "diffxpy" ) .info (
324
318
" Found %i empty sets, removing those." ,
325
319
len (self .RefSets .sets ) - len (idx_nonempty )
326
320
)
@@ -391,7 +385,10 @@ def significant_sets(self, threshold=0.05) -> list:
391
385
"""
392
386
Return significant sets from gene set enrichement analysis as an output table.
393
387
"""
394
- return self .RefSets .subset (idx = np .where (self .qval <= threshold )[0 ])
388
+ sig_sets = np .where (self .qval <= threshold )[0 ]
389
+ if len (sig_sets ) == 0 :
390
+ logging .getLogger ("diffxpy" ).info ("no significant sets found" )
391
+ return self .RefSets .subset (idx = sig_sets )
395
392
396
393
def significant_set_ids (self , threshold = 0.05 ) -> np .array :
397
394
"""
@@ -426,4 +423,4 @@ def set_summary(self, id: str):
426
423
427
424
:return: Slice of summary table.
428
425
"""
429
- return self .summary (sort = False ).iloc [self .RefSets ._ids .index (id ), :]
426
+ return self .summary (sort = False ).iloc [self .RefSets ._ids .tolist (). index (id ), :]
0 commit comments