Skip to content

Commit d2a27b7

Browse files
authored
crdq wrapper task (#724) (#726)
* crdq wrapper task * handle no crdq tasks * add sample type so I can use base_task_args in airflow task
1 parent 4c30439 commit d2a27b7

File tree

2 files changed

+175
-0
lines changed

2 files changed

+175
-0
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import luigi
2+
3+
from v03_pipeline.lib.model import (
4+
CachedReferenceDatasetQuery,
5+
DatasetType,
6+
ReferenceGenome,
7+
SampleType,
8+
)
9+
from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import (
10+
UpdatedCachedReferenceDatasetQuery,
11+
)
12+
13+
14+
class WriteCachedReferenceDatasetQuery(luigi.Task):
15+
reference_genome = luigi.EnumParameter(enum=ReferenceGenome)
16+
dataset_type = luigi.EnumParameter(enum=DatasetType)
17+
sample_type = luigi.EnumParameter(enum=SampleType)
18+
19+
def __init__(self, *args, **kwargs):
20+
super().__init__(*args, **kwargs)
21+
self.checked_for_tasks = False
22+
self.dynamic_crdq_tasks = set()
23+
24+
def complete(self) -> bool:
25+
return self.checked_for_tasks
26+
27+
def run(self):
28+
self.checked_for_tasks = True
29+
for crdq in CachedReferenceDatasetQuery.for_reference_genome_dataset_type(
30+
self.reference_genome,
31+
self.dataset_type,
32+
):
33+
self.dynamic_crdq_tasks.add(
34+
UpdatedCachedReferenceDatasetQuery(
35+
**self.param_kwargs,
36+
crdq=crdq,
37+
),
38+
)
39+
yield self.dynamic_crdq_tasks
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import unittest
2+
from unittest import mock
3+
4+
import luigi
5+
6+
from v03_pipeline.lib.model import (
7+
CachedReferenceDatasetQuery,
8+
DatasetType,
9+
ReferenceGenome,
10+
SampleType,
11+
)
12+
from v03_pipeline.lib.tasks.reference_data.write_cached_reference_dataset_query import (
13+
WriteCachedReferenceDatasetQuery,
14+
)
15+
from v03_pipeline.lib.test.mock_complete_task import MockCompleteTask
16+
17+
18+
@mock.patch(
19+
'v03_pipeline.lib.tasks.reference_data.write_cached_reference_dataset_query.UpdatedCachedReferenceDatasetQuery',
20+
)
21+
class WriteCachedReferenceDatasetQueryTest(unittest.TestCase):
22+
def test_37_snv_indel(self, mock_crdq_task):
23+
mock_crdq_task.return_value = MockCompleteTask()
24+
worker = luigi.worker.Worker()
25+
task = WriteCachedReferenceDatasetQuery(
26+
reference_genome=ReferenceGenome.GRCh37,
27+
dataset_type=DatasetType.SNV_INDEL,
28+
sample_type=SampleType.WGS,
29+
)
30+
worker.add(task)
31+
worker.run()
32+
self.assertTrue(task.complete())
33+
mock_crdq_task.assert_has_calls(
34+
[
35+
mock.call(
36+
reference_genome=ReferenceGenome.GRCh37,
37+
dataset_type=DatasetType.SNV_INDEL,
38+
sample_type=SampleType.WGS,
39+
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
40+
),
41+
mock.call(
42+
reference_genome=ReferenceGenome.GRCh37,
43+
dataset_type=DatasetType.SNV_INDEL,
44+
sample_type=SampleType.WGS,
45+
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
46+
),
47+
mock.call(
48+
reference_genome=ReferenceGenome.GRCh37,
49+
dataset_type=DatasetType.SNV_INDEL,
50+
sample_type=SampleType.WGS,
51+
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
52+
),
53+
mock.call(
54+
reference_genome=ReferenceGenome.GRCh37,
55+
dataset_type=DatasetType.SNV_INDEL,
56+
sample_type=SampleType.WGS,
57+
crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
58+
),
59+
],
60+
)
61+
62+
def test_38_snv_indel(self, mock_crdq_task):
63+
mock_crdq_task.return_value = MockCompleteTask()
64+
worker = luigi.worker.Worker()
65+
task = WriteCachedReferenceDatasetQuery(
66+
reference_genome=ReferenceGenome.GRCh38,
67+
dataset_type=DatasetType.SNV_INDEL,
68+
sample_type=SampleType.WGS,
69+
)
70+
worker.add(task)
71+
worker.run()
72+
self.assertTrue(task.complete())
73+
mock_crdq_task.assert_has_calls(
74+
[
75+
mock.call(
76+
reference_genome=ReferenceGenome.GRCh38,
77+
dataset_type=DatasetType.SNV_INDEL,
78+
sample_type=SampleType.WGS,
79+
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
80+
),
81+
mock.call(
82+
reference_genome=ReferenceGenome.GRCh38,
83+
dataset_type=DatasetType.SNV_INDEL,
84+
sample_type=SampleType.WGS,
85+
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
86+
),
87+
mock.call(
88+
reference_genome=ReferenceGenome.GRCh38,
89+
dataset_type=DatasetType.SNV_INDEL,
90+
sample_type=SampleType.WGS,
91+
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
92+
),
93+
mock.call(
94+
reference_genome=ReferenceGenome.GRCh38,
95+
dataset_type=DatasetType.SNV_INDEL,
96+
sample_type=SampleType.WGS,
97+
crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
98+
),
99+
],
100+
)
101+
102+
def test_38_mito(self, mock_crdq_task):
103+
mock_crdq_task.return_value = MockCompleteTask()
104+
worker = luigi.worker.Worker()
105+
task = WriteCachedReferenceDatasetQuery(
106+
reference_genome=ReferenceGenome.GRCh38,
107+
dataset_type=DatasetType.MITO,
108+
sample_type=SampleType.WGS,
109+
)
110+
worker.add(task)
111+
worker.run()
112+
self.assertTrue(task.complete())
113+
mock_crdq_task.assert_has_calls(
114+
[
115+
mock.call(
116+
reference_genome=ReferenceGenome.GRCh38,
117+
dataset_type=DatasetType.MITO,
118+
sample_type=SampleType.WGS,
119+
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
120+
),
121+
],
122+
)
123+
124+
def test_38_sv(self, mock_crdq_task):
125+
mock_crdq_task.return_value = MockCompleteTask()
126+
worker = luigi.worker.Worker()
127+
task = WriteCachedReferenceDatasetQuery(
128+
reference_genome=ReferenceGenome.GRCh38,
129+
dataset_type=DatasetType.SV,
130+
sample_type=SampleType.WGS,
131+
)
132+
worker.add(task)
133+
worker.run()
134+
self.assertTrue(task.complete())
135+
# assert no crdq tasks for this reference genome and dataset type
136+
mock_crdq_task.assert_has_calls([])

0 commit comments

Comments
 (0)