Skip to content

Commit 8e406d6

Browse files
authored
mito dataset changes (#698)
* handle clinvar change interpretation to classification * fix trailing comma * fix some test tables * update dbnsfp_mito too * ruff format'
1 parent cdc5a8b commit 8e406d6

File tree

61 files changed

+11
-22
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+11
-22
lines changed

v03_pipeline/lib/annotations/enums.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@
204204
'Likely_pathogenic/Likely_risk_allele',
205205
'Established_risk_allele',
206206
'Likely_risk_allele',
207-
'Conflicting_interpretations_of_pathogenicity',
207+
'Conflicting_classifications_of_pathogenicity',
208208
'Uncertain_risk_allele',
209209
'Uncertain_significance/Uncertain_risk_allele',
210210
'Uncertain_significance',

v03_pipeline/lib/reference_data/clinvar.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525
]
2626
CLINVAR_GOLD_STARS_LOOKUP = hl.dict(
2727
{
28-
'no_interpretation_for_the_single_variant': 0,
28+
'no_classification_for_the_single_variant': 0,
2929
'no_assertion_provided': 0,
3030
'no_assertion_criteria_provided': 0,
3131
'criteria_provided,_single_submitter': 1,
32-
'criteria_provided,_conflicting_interpretations': 1,
32+
'criteria_provided,_conflicting_classifications': 1,
3333
'criteria_provided,_multiple_submitters,_no_conflicts': 2,
3434
'reviewed_by_expert_panel': 3,
3535
'practice_guideline': 4,

v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ def test_update_vat_with_updated_rdc_mito_38(
335335
mitomap='gs://seqr-reference-data/GRCh38/mitochondrial/MITOMAP/mitomap-confirmed-mutations-2022-02-04.ht',
336336
mitimpact='gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.0.7.ht',
337337
clinvar_mito='ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
338-
dbnsfp_mito='gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.ht',
338+
dbnsfp_mito='gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.with_new_scores.ht',
339339
high_constraint_region_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix high constraint intervals Feb-15-2022.tsv',
340340
),
341341
versions=hl.Struct(
@@ -359,10 +359,7 @@ def test_update_vat_with_updated_rdc_mito_38(
359359
assertion=CLINVAR_ASSERTIONS,
360360
),
361361
dbnsfp_mito=hl.Struct(
362-
SIFT_pred=['D', 'T'],
363-
Polyphen2_HVAR_pred=['D', 'P', 'B'],
364362
MutationTaster_pred=['D', 'A', 'N', 'P'],
365-
fathmm_MKL_coding_pred=['D', 'N'],
366363
),
367364
high_constraint_region_mito=hl.Struct(),
368365
sorted_transcript_consequences=hl.Struct(
@@ -390,13 +387,8 @@ def test_update_vat_with_updated_rdc_mito_38(
390387
alleles=['A', 'C'],
391388
clinvar_mito=None,
392389
dbnsfp_mito=hl.Struct(
393-
REVEL_score=None,
394-
VEST4_score=None,
395-
MutPred_score=None,
396-
SIFT_pred_id=None,
397-
Polyphen2_HVAR_pred_id=None,
398-
MutationTaster_pred_id=None,
399-
fathmm_MKL_coding_pred_id=None,
390+
SIFT_score=None,
391+
MutationTaster_pred='N',
400392
),
401393
gnomad_mito=None,
402394
helix_mito=hl.Struct(

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ def test_mito_update_vat(self, mock_update_rdc_task: Mock) -> None:
633633
paths=hl.Struct(
634634
high_constraint_region_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix high constraint intervals Feb-15-2022.tsv',
635635
clinvar_mito='ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
636-
dbnsfp_mito='gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.ht',
636+
dbnsfp_mito='gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.with_new_scores.ht',
637637
gnomad_mito='gs://gcp-public-data--gnomad/release/3.1/ht/genomes/gnomad.genomes.v3.1.sites.chrM.ht',
638638
helix_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix/HelixMTdb_20200327.ht',
639639
hmtvar='gs://seqr-reference-data/GRCh38/mitochondrial/HmtVar/HmtVar%20Jan.%2010%202022.ht',
@@ -657,10 +657,7 @@ def test_mito_update_vat(self, mock_update_rdc_task: Mock) -> None:
657657
pathogenicity=CLINVAR_PATHOGENICITIES,
658658
),
659659
dbnsfp_mito=hl.Struct(
660-
SIFT_pred=['D', 'T'],
661-
Polyphen2_HVAR_pred=['D', 'P', 'B'],
662660
MutationTaster_pred=['D', 'A', 'N', 'P'],
663-
fathmm_MKL_coding_pred=['D', 'N'],
664661
),
665662
gnomad_mito=hl.Struct(),
666663
helix_mito=hl.Struct(),
Binary file not shown.
Binary file not shown.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
22
Written with version 0.2.122-be9d88a80695
3-
Created at 2024/01/31 15:18:56
3+
Created at 2024/02/21 15:21:48
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)