Skip to content

Commit b97cf6f

Browse files
bpblankenmatren395
andauthored
vets filters (#774)
* vets filters * move to separate module * remove PASS * fix linters * Update v03_pipeline/lib/misc/vets_test.py Co-authored-by: Daniel Marten <78616802+matren395@users.noreply.github.com> * fix tests * PR review comments * Update vets.py --------- Co-authored-by: Daniel Marten <78616802+matren395@users.noreply.github.com>
1 parent b85db51 commit b97cf6f

File tree

4 files changed

+152
-1
lines changed

4 files changed

+152
-1
lines changed

v03_pipeline/lib/misc/io_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import unittest
22

3-
from v03_pipeline.lib.misc.io import compute_hail_n_partitions, file_size_bytes
3+
from v03_pipeline.lib.misc.io import (
4+
compute_hail_n_partitions,
5+
file_size_bytes,
6+
)
47

58
TEST_MITO_MT = 'v03_pipeline/var/test/callsets/mito_1.mt'
69
TEST_SV_VCF = 'v03_pipeline/var/test/callsets/sv_1.vcf'

v03_pipeline/lib/misc/vets.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import hail as hl
2+
3+
VETS_SNP_CUTOFF = 0.997
4+
VETS_INDEL_CUTOFF = 0.99
5+
VETS_SNP_FILTER = 'high_CALIBRATION_SENSITIVITY_SNP'
6+
VETS_INDEL_FILTER = 'high_CALIBRATION_SENSITIVITY_INDEL'
7+
8+
9+
def annotate_vets(mt: hl.MatrixTable) -> hl.MatrixTable:
10+
if not hasattr(mt, 'info') or not hasattr(mt.info, 'CALIBRATION_SENSITIVITY'):
11+
return mt
12+
return mt.annotate_rows(
13+
filters=hl.bind(
14+
lambda is_snp, split_cs: (
15+
hl.case()
16+
.when(
17+
is_snp & (split_cs > VETS_SNP_CUTOFF),
18+
hl.if_else(
19+
hl.is_defined(mt.filters),
20+
mt.filters.add(VETS_SNP_FILTER),
21+
hl.set([VETS_SNP_FILTER]),
22+
),
23+
)
24+
.when(
25+
~is_snp & (split_cs > VETS_INDEL_CUTOFF),
26+
hl.if_else(
27+
hl.is_defined(mt.filters),
28+
mt.filters.add(VETS_INDEL_FILTER),
29+
hl.set([VETS_INDEL_FILTER]),
30+
),
31+
)
32+
.default(
33+
mt.filters,
34+
)
35+
),
36+
hl.is_snp(mt.alleles[0], mt.alleles[1]),
37+
hl.parse_float(mt.info.CALIBRATION_SENSITIVITY[mt.a_index - 1]),
38+
),
39+
)

v03_pipeline/lib/misc/vets_test.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import unittest
2+
3+
import hail as hl
4+
5+
from v03_pipeline.lib.misc.io import split_multi_hts
6+
from v03_pipeline.lib.misc.vets import (
7+
annotate_vets,
8+
)
9+
10+
11+
class VetsTest(unittest.TestCase):
12+
def test_annotate_vets(self) -> None:
13+
gatk_mt = hl.MatrixTable.from_parts(
14+
rows={
15+
'locus': [
16+
hl.Locus(
17+
contig='chr1',
18+
position=1,
19+
reference_genome='GRCh38',
20+
),
21+
],
22+
'filters': [
23+
hl.set(['NO_HQ_GENOTYPES']),
24+
],
25+
},
26+
cols={'s': ['sample_1']},
27+
entries={'HL': [[0.0]]},
28+
).key_rows_by('locus')
29+
gatk_mt = annotate_vets(gatk_mt)
30+
dragen_mt = hl.MatrixTable.from_parts(
31+
rows={
32+
'locus': [
33+
hl.Locus(
34+
contig='chr1',
35+
position=1,
36+
reference_genome='GRCh38',
37+
),
38+
hl.Locus(
39+
contig='chr1',
40+
position=2,
41+
reference_genome='GRCh38',
42+
),
43+
hl.Locus(
44+
contig='chr1',
45+
position=3,
46+
reference_genome='GRCh38',
47+
),
48+
hl.Locus(
49+
contig='chr1',
50+
position=4,
51+
reference_genome='GRCh38',
52+
),
53+
hl.Locus(
54+
contig='chr1',
55+
position=5,
56+
reference_genome='GRCh38',
57+
),
58+
hl.Locus(
59+
contig='chr1',
60+
position=6,
61+
reference_genome='GRCh38',
62+
),
63+
],
64+
'alleles': [
65+
['A', 'T'],
66+
['A', 'T'],
67+
['A', 'T'],
68+
['AC', 'T'],
69+
['AT', 'ATC'],
70+
['AG', 'ATG'],
71+
],
72+
'filters': [
73+
hl.set(['NO_HQ_GENOTYPES']),
74+
hl.empty_set(hl.tstr),
75+
hl.missing(hl.tset(hl.tstr)),
76+
hl.set(['NO_HQ_GENOTYPES']),
77+
hl.empty_set(hl.tstr),
78+
hl.set(['NO_HQ_GENOTYPES']),
79+
],
80+
'info': [
81+
hl.Struct(CALIBRATION_SENSITIVITY=['0.999']),
82+
hl.Struct(CALIBRATION_SENSITIVITY=['0.995']),
83+
hl.Struct(CALIBRATION_SENSITIVITY=['0.999']),
84+
hl.Struct(CALIBRATION_SENSITIVITY=['0.98']),
85+
hl.Struct(CALIBRATION_SENSITIVITY=['0.99']),
86+
hl.Struct(CALIBRATION_SENSITIVITY=['0.991']),
87+
],
88+
},
89+
cols={'s': ['sample_1']},
90+
entries={'HL': [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]},
91+
).key_rows_by('locus', 'alleles')
92+
dragen_mt = split_multi_hts(dragen_mt)
93+
dragen_mt = annotate_vets(dragen_mt)
94+
self.assertListEqual(
95+
dragen_mt.filters.collect(),
96+
[
97+
{'NO_HQ_GENOTYPES', 'high_CALIBRATION_SENSITIVITY_SNP'},
98+
set(),
99+
{'high_CALIBRATION_SENSITIVITY_SNP'},
100+
{'NO_HQ_GENOTYPES'},
101+
set(),
102+
{'NO_HQ_GENOTYPES', 'high_CALIBRATION_SENSITIVITY_INDEL'},
103+
],
104+
)

v03_pipeline/lib/tasks/write_imported_callset.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
validate_no_duplicate_variants,
1212
validate_sample_type,
1313
)
14+
from v03_pipeline.lib.misc.vets import annotate_vets
1415
from v03_pipeline.lib.model import CachedReferenceDatasetQuery
1516
from v03_pipeline.lib.paths import (
1617
imported_callset_path,
@@ -85,6 +86,10 @@ def create_table(self) -> hl.MatrixTable:
8586
mt = select_relevant_fields(mt, self.dataset_type)
8687
if self.dataset_type.has_multi_allelic_variants:
8788
mt = split_multi_hts(mt)
89+
# Special handling of variant-level filter annotation for VETs filters.
90+
# The annotations are present on the sample-level FT field but are
91+
# expected upstream on "filters".
92+
mt = annotate_vets(mt)
8893
if self.dataset_type.can_run_validation:
8994
# Rather than throwing an error, we silently remove invalid contigs.
9095
# This happens fairly often for AnVIL requests.

0 commit comments

Comments
 (0)