Skip to content

Commit 1a660ed

Browse files
committed
Improve logic for detecting bulk RNA-Seq datasets
1 parent b626a41 commit 1a660ed

File tree

1 file changed

+11
-4
lines changed

1 file changed

+11
-4
lines changed

rnaseq_pipeline/miniml_utils.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,20 @@ def collect_geo_samples(f):
2626

2727
for x in root.findall('miniml:Sample', ns):
2828
gsm_id = x.find("miniml:Accession[@database='GEO']", ns)
29-
library_strategy = x.find('miniml:Library-Strategy', ns)
3029
platform_id = x.find('miniml:Platform-Ref', ns)
3130
sra_relation = x.find("miniml:Relation[@type='SRA']", ns)
32-
if gsm_id is None or platform_id is None or library_strategy is None or sra_relation is None:
31+
if gsm_id is None or platform_id is None or sra_relation is None:
3332
continue
34-
if library_strategy.text in ['RNA-Seq', 'ssRNA-seq']:
35-
gsm_identifiers.add(gsm_id.text)
33+
# this has to match the logic in Gemma for bulk RNA-Seq, see GeoConverterImpl.java
34+
sample_type = x.find('miniml:Type', ns)
35+
if sample_type is None:
36+
continue
37+
if sample_type.text == 'SRA':
38+
library_source = x.find('miniml:Library-Source', ns)
39+
if library_source is not None and library_source.text == 'transcriptomic':
40+
library_strategy = x.find('miniml:Library-Strategy', ns)
41+
if library_strategy is not None and library_strategy.text in ['RNA-Seq', 'ssRNA-seq', 'OTHER']:
42+
gsm_identifiers.add(gsm_id.text)
3643

3744
return gsm_identifiers
3845

0 commit comments

Comments
 (0)