@@ -26,13 +26,20 @@ def collect_geo_samples(f):
26
26
27
27
for x in root .findall ('miniml:Sample' , ns ):
28
28
gsm_id = x .find ("miniml:Accession[@database='GEO']" , ns )
29
- library_strategy = x .find ('miniml:Library-Strategy' , ns )
30
29
platform_id = x .find ('miniml:Platform-Ref' , ns )
31
30
sra_relation = x .find ("miniml:Relation[@type='SRA']" , ns )
32
- if gsm_id is None or platform_id is None or library_strategy is None or sra_relation is None :
31
+ if gsm_id is None or platform_id is None or sra_relation is None :
33
32
continue
34
- if library_strategy .text in ['RNA-Seq' , 'ssRNA-seq' ]:
35
- gsm_identifiers .add (gsm_id .text )
33
+ # this has to match the logic in Gemma for bulk RNA-Seq, see GeoConverterImpl.java
34
+ sample_type = x .find ('miniml:Type' , ns )
35
+ if sample_type is None :
36
+ continue
37
+ if sample_type .text == 'SRA' :
38
+ library_source = x .find ('miniml:Library-Source' , ns )
39
+ if library_source is not None and library_source .text == 'transcriptomic' :
40
+ library_strategy = x .find ('miniml:Library-Strategy' , ns )
41
+ if library_strategy is not None and library_strategy .text in ['RNA-Seq' , 'ssRNA-seq' , 'OTHER' ]:
42
+ gsm_identifiers .add (gsm_id .text )
36
43
37
44
return gsm_identifiers
38
45
0 commit comments