Skip to content

Commit 23cece6

Browse files
committed
Filter single allele deletions in clinvar
1 parent 4e0674b commit 23cece6

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

v03_pipeline/lib/reference_datasets/clinvar.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,11 @@
1212
CLINVAR_PATHOGENICITIES_LOOKUP,
1313
)
1414
from v03_pipeline.lib.model.definitions import ReferenceGenome
15-
from v03_pipeline.lib.reference_datasets.misc import copy_to_cloud_storage, vcf_to_ht
15+
from v03_pipeline.lib.reference_datasets.misc import (
16+
BIALLELIC,
17+
copy_to_cloud_storage,
18+
vcf_to_ht,
19+
)
1620

1721
CLINVAR_GOLD_STARS_LOOKUP = hl.dict(
1822
{
@@ -166,6 +170,8 @@ def get_ht(
166170
shutil.copyfileobj(r.raw, tmp_file)
167171
cloud_tmp_file = copy_to_cloud_storage(tmp_file.name)
168172
ht = vcf_to_ht(cloud_tmp_file, reference_genome)
173+
# Filter deletions present as single alleles
174+
ht = ht.filter(hl.len(ht.alleles) == BIALLELIC)
169175
submitters_ht = get_submission_summary_ht()
170176
ht = ht.annotate(
171177
submitters=submitters_ht[ht.rsid].Submitters,

0 commit comments

Comments
 (0)