Skip to content

Commit a45ed54

Browse files
committed
Add an option for ignoring individual samples when download a GEO series or SRA project
1 parent 7ba68a1 commit a45ed54

File tree

3 files changed

+9
-3
lines changed

3 files changed

+9
-3
lines changed

rnaseq_pipeline/sources/geo.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,11 @@ class DownloadGeoSeries(DynamicTaskWithOutputMixin, DynamicWrapperTask):
136136
Download all GEO Samples related to a GEO Series.
137137
"""
138138

139+
ignored_samples = luigi.ListParameter(default=[], description='Ignored GSM identifiers')
140+
139141
def run(self):
140142
gsms = collect_geo_samples(self.input().path)
143+
gsms = [gsm for gsm in gsms if gsm not in self.ignored_samples]
141144
if not gsms:
142145
raise ValueError('{} has no related GEO samples with RNA-Seq data.'.format(self.gse))
143146
yield [DownloadGeoSample(gsm, metadata=self.metadata) for gsm in gsms]

rnaseq_pipeline/sources/sra.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,10 @@ def output(self):
182182

183183
@requires(DownloadSraProjectRunInfo)
184184
class DownloadSraProject(DynamicTaskWithOutputMixin, DynamicWrapperTask):
185+
ignored_samples = luigi.ListParameter(default=[], description='Ignored SRX identifiers')
185186
def run(self):
186187
df = read_runinfo(self.input().path)
187-
yield [DownloadSraExperiment(experiment, metadata=self.metadata) for experiment, runs in df.groupby('Experiment')]
188+
yield [DownloadSraExperiment(experiment, metadata=self.metadata) for experiment, runs in df.groupby('Experiment') if experiment not in self.ignored_samples]
188189

189190
@requires(DownloadSraProjectRunInfo, DownloadSraProject)
190191
class ExtractSraProjectBatchInfo(luigi.Task):

rnaseq_pipeline/tasks.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -396,13 +396,15 @@ class SubmitExperimentBatchInfoToGemma(RerunnableTaskMixin, GemmaCliTask):
396396

397397
resources = {'submit_batch_info_jobs': 1}
398398

399+
ignored_samples = luigi.ListParameter(default=[])
400+
399401
def requires(self):
400402
# TODO: Have a generic strategy for extracting batch info that would
401403
# work for all sources
402404
if self.external_database == 'GEO':
403-
return ExtractGeoSeriesBatchInfo(self.accession, metadata=dict(experiment_id=self.experiment_id))
405+
return ExtractGeoSeriesBatchInfo(self.accession, metadata=dict(experiment_id=self.experiment_id), ignored_samples=self.ignored_samples)
404406
elif self.external_database == 'SRA':
405-
return ExtractSraProjectBatchInfo(self.accession, metadata=dict(experiment_id=self.experiment_id))
407+
return ExtractSraProjectBatchInfo(self.accession, metadata=dict(experiment_id=self.experiment_id), ignored_samples=self.ignored_samples)
406408
else:
407409
raise NotImplementedError('Extracting batch information from {} is not supported.'.format(self.external_database))
408410

0 commit comments

Comments
 (0)