Skip to content

Commit 6eb7331

Browse files
committed
Add validation for duplicate variants
1 parent ab13f4c commit 6eb7331

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

v03_pipeline/lib/reference_datasets/reference_dataset.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66

77
import hail as hl
88

9+
from v03_pipeline.lib.misc.validation import (
10+
validate_allele_type,
11+
validate_no_duplicate_variants,
12+
)
913
from v03_pipeline.lib.model import AccessControl, DatasetType, Env, ReferenceGenome
1014
from v03_pipeline.lib.reference_datasets import clinvar, dbnsfp
1115
from v03_pipeline.lib.reference_datasets.misc import (
@@ -115,6 +119,11 @@ def get_ht(
115119
if enum_selects:
116120
ht = ht.transmute(**enum_selects)
117121
ht = filter_contigs(ht, reference_genome)
122+
# Reference Datasets are DatasetType agnostic, but these
123+
# methods (in theory) support SV/GCNV. SNV_INDEL
124+
# is passed as a proxy for non-SV/GCNV.
125+
validate_allele_type(ht, DatasetType.SNV_INDEL)
126+
validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL)
118127
# NB: we do not filter with "filter" here
119128
# ReferenceDatasets are DatasetType agnostic and that
120129
# filter is only used at annotation time.

0 commit comments

Comments
 (0)