Skip to content

Commit bbaf421

Browse files
committed
Add a check for validity of GEO metadata
1 parent 16f249d commit bbaf421

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

rnaseq_pipeline/sources/geo.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from functools import lru_cache
1313
import re
1414
import requests
15-
import xml.etree.ElementTree
15+
from xml.etree import ElementTree
1616

1717
from bioluigi.tasks.utils import DynamicTaskWithOutputMixin, DynamicWrapperTask, TaskWithMetadataMixin
1818
import luigi
@@ -37,7 +37,7 @@ def retrieve_geo_platform_miniml(geo_platform):
3737
"""Retrieve a GEO platform MINiML metadata"""
3838
res = requests.get('https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi', params=dict(acc=geo_platform, form='xml'))
3939
res.raise_for_status()
40-
return xml.etree.ElementTree.fromstring(res.text).find('miniml:Platform', ns)
40+
return ElementTree.fromstring(res.text).find('miniml:Platform', ns)
4141

4242
def match_geo_platform(geo_platform):
4343
"""Infer the type of platform given a GEO platform"""
@@ -73,6 +73,10 @@ def run(self):
7373
logger.info('%s is stale, redownloading...', self.output())
7474
res = requests.get('https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi', params=dict(acc=self.gsm, form='xml'))
7575
res.raise_for_status()
76+
try:
77+
ElementTree.fromstring(res.text)
78+
except ElementTree.ParseError as e:
79+
raise Exception('Failed to parse XML from GEO sample metadata of ' + self.gsm) from e
7680
with self.output().open('w') as f:
7781
f.write(res.text)
7882

@@ -125,6 +129,10 @@ def run(self):
125129
logger.info('%s is stale, redownloading...', self.output())
126130
res = requests.get('https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi', params=dict(acc=self.gse, form='xml', targ='gsm'))
127131
res.raise_for_status()
132+
try:
133+
ElementTree.fromstring(res.text)
134+
except ElementTree.ParseError as e:
135+
raise Exception('Failed to parse XML from GEO series metadata of ' + self.gse) from e
128136
with self.output().open('w') as f:
129137
f.write(res.text)
130138

0 commit comments

Comments
 (0)