Skip to content

Commit e4b6bdb

Browse files
committed
Fix missing hgmd variants (#747)
* Fix missing recoding * Fix hgmd parsing * lint * Update tables * Update versions
1 parent dd6e28d commit e4b6bdb

38 files changed

+42
-7
lines changed

v03_pipeline/lib/reference_data/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ def custom_mpc_select(ht):
239239
'hgmd': {
240240
'37': {
241241
'custom_import': download_and_import_hgmd_vcf,
242+
'version': 'HGMD_Pro_2023',
242243
'source_path': 'gs://seqr-reference-data-private/GRCh37/HGMD/HGMD_Pro_2023.1_hg19.vcf.gz',
243244
'select': {'accession': 'rsid', 'class': 'info.CLASS'},
244245
'enum_select': {
@@ -254,6 +255,7 @@ def custom_mpc_select(ht):
254255
},
255256
'38': {
256257
'custom_import': download_and_import_hgmd_vcf,
258+
'version': 'HGMD_Pro_2023',
257259
'source_path': 'gs://seqr-reference-data-private/GRCh38/HGMD/HGMD_Pro_2023.1_hg38.vcf.gz',
258260
'select': {'accession': 'rsid', 'class': 'info.CLASS'},
259261
'enum_select': {

v03_pipeline/lib/reference_data/hgmd.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,6 @@ def download_and_import_hgmd_vcf(
1313
force=True,
1414
min_partitions=100,
1515
skip_invalid_loci=True,
16+
contig_recoding=reference_genome.contig_recoding(),
1617
)
1718
return mt.rows()
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import unittest
2+
3+
from v03_pipeline.lib.model import ReferenceGenome
4+
from v03_pipeline.lib.reference_data.hgmd import download_and_import_hgmd_vcf
5+
6+
TEST_HGMD_VCF = 'v03_pipeline/var/test/reference_data/test_hgmd.vcf'
7+
8+
9+
class HGMDTest(unittest.TestCase):
10+
def test_import_hgmd_vcf(self):
11+
ht = download_and_import_hgmd_vcf(TEST_HGMD_VCF, ReferenceGenome.GRCh38)
12+
self.assertEqual(ht.count(), 1)

v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,7 @@ def test_update_vat_with_updated_rdc_snv_indel_38(
789789
topmed=None,
790790
gnomad_non_coding_constraint=None,
791791
screen=None,
792-
hgmd=None,
792+
hgmd='HGMD_Pro_2023',
793793
),
794794
enums=hl.Struct(
795795
cadd=hl.Struct(),
@@ -1053,7 +1053,7 @@ def test_update_vat_with_updated_rdc_snv_indel_37(
10531053
primate_ai='v0.2',
10541054
splice_ai=None,
10551055
topmed=None,
1056-
hgmd=None,
1056+
hgmd='HGMD_Pro_2023',
10571057
),
10581058
enums=hl.Struct(
10591059
cadd=hl.Struct(),

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ def test_multiple_update_vat(
472472
topmed=None,
473473
gnomad_non_coding_constraint=None,
474474
screen=None,
475-
hgmd=None,
475+
hgmd='HGMD_Pro_2023',
476476
),
477477
enums=hl.Struct(
478478
cadd=hl.Struct(),
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
##fileformat=VCFv4.2
2+
##note=VCF file is compatible with VCFv4.3 if required by updating fileformat parameter to v4.3
3+
##source=HGMD_PRO_2023.1
4+
##reference=GRCh38
5+
##comment="REF and ALT sequences are both on forward strand of reference assembly"
6+
##INFO=<ID=CLASS,Number=1,Type=String>
7+
##INFO=<ID=MUT,Number=1,Type=String>
8+
##INFO=<ID=GENE,Number=1,Type=String>
9+
##INFO=<ID=STRAND,Number=1,Type=String>
10+
##INFO=<ID=DNA,Number=1,Type=String>
11+
##INFO=<ID=PROT,Number=1,Type=String>
12+
##INFO=<ID=DB,Number=1,Type=String>
13+
##INFO=<ID=PHEN,Number=1,Type=String>
14+
##INFO=<ID=RANKSCORE,Number=1,Type=Float>
15+
##INFO=<ID=SVTYPE,Number=1,Type=String>
16+
##INFO=<ID=END,Number=1,Type=Integer>
17+
##INFO=<ID=SVLEN,Number=1,Type=Integer>
18+
##ALT=<ID=DEL>
19+
#CHROM POS ID REF ALT QUAL FILTER INFO
20+
1 925942 CM2039807 A G . . CLASS=DM?;MUT=ALT;GENE=SAMD11;STRAND=+;DNA=NM_152486.4%3Ac.1A>G;PROT=NP_689699.3%3Ap.M1?;DB=rs1403306757;PHEN="Retinitis_pigmentosa";RANKSCORE=0.57
Binary file not shown.
Binary file not shown.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
2-
Written with version 0.2.128-eead8100a1c1
3-
Created at 2024/03/11 13:33:57
2+
Written with version 0.2.120-f00f916faf78
3+
Created at 2024/03/28 16:27:47
Binary file not shown.

0 commit comments

Comments
 (0)