Skip to content

Commit 06136cc

Browse files
committed
Merge branch 'dev' of github.com:broadinstitute/seqr-loading-pipelines
2 parents c17dc72 + f76c161 commit 06136cc

File tree

4 files changed

+180
-3
lines changed

4 files changed

+180
-3
lines changed

v03_pipeline/lib/misc/family_loading_failures.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def passes_relatedness_check(
2222
if not coefficients or not np.allclose(
2323
coefficients,
2424
relation.coefficients,
25-
0.1,
25+
atol=0.1,
2626
):
2727
return (
2828
False,
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import luigi
2+
3+
from v03_pipeline.lib.model import (
4+
CachedReferenceDatasetQuery,
5+
DatasetType,
6+
ReferenceGenome,
7+
SampleType,
8+
)
9+
from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import (
10+
UpdatedCachedReferenceDatasetQuery,
11+
)
12+
13+
14+
class WriteCachedReferenceDatasetQuery(luigi.Task):
15+
reference_genome = luigi.EnumParameter(enum=ReferenceGenome)
16+
dataset_type = luigi.EnumParameter(enum=DatasetType)
17+
sample_type = luigi.EnumParameter(enum=SampleType)
18+
19+
def __init__(self, *args, **kwargs):
20+
super().__init__(*args, **kwargs)
21+
self.checked_for_tasks = False
22+
self.dynamic_crdq_tasks = set()
23+
24+
def complete(self) -> bool:
25+
return self.checked_for_tasks
26+
27+
def run(self):
28+
self.checked_for_tasks = True
29+
for crdq in CachedReferenceDatasetQuery.for_reference_genome_dataset_type(
30+
self.reference_genome,
31+
self.dataset_type,
32+
):
33+
self.dynamic_crdq_tasks.add(
34+
UpdatedCachedReferenceDatasetQuery(
35+
**self.param_kwargs,
36+
crdq=crdq,
37+
),
38+
)
39+
yield self.dynamic_crdq_tasks
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import unittest
2+
from unittest import mock
3+
4+
import luigi
5+
6+
from v03_pipeline.lib.model import (
7+
CachedReferenceDatasetQuery,
8+
DatasetType,
9+
ReferenceGenome,
10+
SampleType,
11+
)
12+
from v03_pipeline.lib.tasks.reference_data.write_cached_reference_dataset_query import (
13+
WriteCachedReferenceDatasetQuery,
14+
)
15+
from v03_pipeline.lib.test.mock_complete_task import MockCompleteTask
16+
17+
18+
@mock.patch(
19+
'v03_pipeline.lib.tasks.reference_data.write_cached_reference_dataset_query.UpdatedCachedReferenceDatasetQuery',
20+
)
21+
class WriteCachedReferenceDatasetQueryTest(unittest.TestCase):
22+
def test_37_snv_indel(self, mock_crdq_task):
23+
mock_crdq_task.return_value = MockCompleteTask()
24+
worker = luigi.worker.Worker()
25+
task = WriteCachedReferenceDatasetQuery(
26+
reference_genome=ReferenceGenome.GRCh37,
27+
dataset_type=DatasetType.SNV_INDEL,
28+
sample_type=SampleType.WGS,
29+
)
30+
worker.add(task)
31+
worker.run()
32+
self.assertTrue(task.complete())
33+
mock_crdq_task.assert_has_calls(
34+
[
35+
mock.call(
36+
reference_genome=ReferenceGenome.GRCh37,
37+
dataset_type=DatasetType.SNV_INDEL,
38+
sample_type=SampleType.WGS,
39+
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
40+
),
41+
mock.call(
42+
reference_genome=ReferenceGenome.GRCh37,
43+
dataset_type=DatasetType.SNV_INDEL,
44+
sample_type=SampleType.WGS,
45+
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
46+
),
47+
mock.call(
48+
reference_genome=ReferenceGenome.GRCh37,
49+
dataset_type=DatasetType.SNV_INDEL,
50+
sample_type=SampleType.WGS,
51+
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
52+
),
53+
mock.call(
54+
reference_genome=ReferenceGenome.GRCh37,
55+
dataset_type=DatasetType.SNV_INDEL,
56+
sample_type=SampleType.WGS,
57+
crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
58+
),
59+
],
60+
)
61+
62+
def test_38_snv_indel(self, mock_crdq_task):
63+
mock_crdq_task.return_value = MockCompleteTask()
64+
worker = luigi.worker.Worker()
65+
task = WriteCachedReferenceDatasetQuery(
66+
reference_genome=ReferenceGenome.GRCh38,
67+
dataset_type=DatasetType.SNV_INDEL,
68+
sample_type=SampleType.WGS,
69+
)
70+
worker.add(task)
71+
worker.run()
72+
self.assertTrue(task.complete())
73+
mock_crdq_task.assert_has_calls(
74+
[
75+
mock.call(
76+
reference_genome=ReferenceGenome.GRCh38,
77+
dataset_type=DatasetType.SNV_INDEL,
78+
sample_type=SampleType.WGS,
79+
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
80+
),
81+
mock.call(
82+
reference_genome=ReferenceGenome.GRCh38,
83+
dataset_type=DatasetType.SNV_INDEL,
84+
sample_type=SampleType.WGS,
85+
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
86+
),
87+
mock.call(
88+
reference_genome=ReferenceGenome.GRCh38,
89+
dataset_type=DatasetType.SNV_INDEL,
90+
sample_type=SampleType.WGS,
91+
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
92+
),
93+
mock.call(
94+
reference_genome=ReferenceGenome.GRCh38,
95+
dataset_type=DatasetType.SNV_INDEL,
96+
sample_type=SampleType.WGS,
97+
crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
98+
),
99+
],
100+
)
101+
102+
def test_38_mito(self, mock_crdq_task):
103+
mock_crdq_task.return_value = MockCompleteTask()
104+
worker = luigi.worker.Worker()
105+
task = WriteCachedReferenceDatasetQuery(
106+
reference_genome=ReferenceGenome.GRCh38,
107+
dataset_type=DatasetType.MITO,
108+
sample_type=SampleType.WGS,
109+
)
110+
worker.add(task)
111+
worker.run()
112+
self.assertTrue(task.complete())
113+
mock_crdq_task.assert_has_calls(
114+
[
115+
mock.call(
116+
reference_genome=ReferenceGenome.GRCh38,
117+
dataset_type=DatasetType.MITO,
118+
sample_type=SampleType.WGS,
119+
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
120+
),
121+
],
122+
)
123+
124+
def test_38_sv(self, mock_crdq_task):
125+
mock_crdq_task.return_value = MockCompleteTask()
126+
worker = luigi.worker.Worker()
127+
task = WriteCachedReferenceDatasetQuery(
128+
reference_genome=ReferenceGenome.GRCh38,
129+
dataset_type=DatasetType.SV,
130+
sample_type=SampleType.WGS,
131+
)
132+
worker.add(task)
133+
worker.run()
134+
self.assertTrue(task.complete())
135+
# assert no crdq tasks for this reference genome and dataset type
136+
mock_crdq_task.assert_has_calls([])

v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,14 @@ def create_table(self) -> hl.MatrixTable:
118118
relatedness_check_ht = hl.read_table(self.input()[2].path)
119119
sex_check_ht = hl.read_table(self.input()[3].path)
120120
families_failed_relatedness_check = get_families_failed_relatedness_check(
121-
families,
121+
families - families_failed_missing_samples.keys(),
122122
relatedness_check_ht,
123123
remap_lookup,
124124
)
125125
families_failed_sex_check = get_families_failed_sex_check(
126-
families,
126+
families
127+
- families_failed_missing_samples.keys()
128+
- families_failed_relatedness_check.keys(),
127129
sex_check_ht,
128130
remap_lookup,
129131
)

0 commit comments

Comments
 (0)