Skip to content

Commit 127a074

Browse files
authored
Merge pull request #702 from broadinstitute/benb/reference_genome_in_vep_config
Add reference genome check to VEP.
2 parents 57b5692 + 0a6c42a commit 127a074

File tree

5 files changed

+33
-4
lines changed

5 files changed

+33
-4
lines changed

v03_pipeline/lib/annotations/fields_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,16 @@ def setUp(self) -> None:
3131
),
3232
)
3333

34+
@patch('v03_pipeline.lib.vep.validate_vep_config_reference_genome')
3435
@patch('v03_pipeline.lib.vep.hl.vep')
35-
def test_get_formatting_fields(self, mock_vep: Mock) -> None:
36+
def test_get_formatting_fields(self, mock_vep: Mock, mock_validate: Mock) -> None:
3637
ht = hl.read_table(TEST_COMBINED_1)
3738
mock_vep.return_value = ht.annotate(vep=MOCK_VEP_DATA)
39+
mock_validate.return_value = None
3840
ht = run_vep(
3941
ht,
4042
DatasetType.SNV_INDEL,
43+
ReferenceGenome.GRCh38,
4144
None,
4245
)
4346
ht = ht.annotate(rsid='abcd')

v03_pipeline/lib/annotations/shared_test.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@
1010

1111

1212
class SharedAnnotationsTest(unittest.TestCase):
13+
@patch('v03_pipeline.lib.vep.validate_vep_config_reference_genome')
1314
@patch('v03_pipeline.lib.vep.hl.vep')
14-
def test_sorted_transcript_consequences(self, mock_vep: Mock) -> None:
15+
def test_sorted_transcript_consequences(
16+
self,
17+
mock_vep: Mock,
18+
mock_validate: Mock,
19+
) -> None:
1520
ht = hl.Table.parallelize(
1621
[
1722
{
@@ -30,9 +35,11 @@ def test_sorted_transcript_consequences(self, mock_vep: Mock) -> None:
3035
key=['locus', 'alleles'],
3136
)
3237
mock_vep.return_value = ht.annotate(vep=MOCK_VEP_DATA)
38+
mock_validate.return_value = None
3339
ht = run_vep(
3440
ht,
3541
DatasetType.SNV_INDEL,
42+
ReferenceGenome.GRCh38,
3643
None,
3744
)
3845
ht = ht.select(

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ def update_table(self, ht: hl.Table) -> hl.Table:
215215
new_variants_ht = run_vep(
216216
new_variants_ht,
217217
self.dataset_type,
218+
self.reference_genome,
218219
self.vep_config_json_path,
219220
)
220221

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,14 +186,17 @@ def test_missing_interval_reference(self, mock_update_rdc_task) -> None:
186186
)
187187
@patch.object(ReferenceGenome, 'standard_contigs', new_callable=PropertyMock)
188188
@patch('v03_pipeline.lib.vep.hl.vep')
189+
@patch('v03_pipeline.lib.vep.validate_vep_config_reference_genome')
189190
def test_multiple_update_vat(
190191
self,
192+
mock_vep_validate: Mock,
191193
mock_vep: Mock,
192194
mock_standard_contigs: Mock,
193195
mock_update_rdc_task: Mock,
194196
) -> None:
195197
mock_update_rdc_task.return_value = MockCompleteTask()
196198
mock_vep.side_effect = lambda ht, **_: ht.annotate(vep=MOCK_VEP_DATA)
199+
mock_vep_validate.return_value = None
197200
mock_standard_contigs.return_value = {'chr1'}
198201
# This creates a mock validation table with 1 coding and 1 non-coding variant
199202
# explicitly chosen from the VCF.
@@ -504,13 +507,16 @@ def test_multiple_update_vat(
504507
)
505508

506509
@patch('v03_pipeline.lib.vep.hl.vep')
510+
@patch('v03_pipeline.lib.vep.validate_vep_config_reference_genome')
507511
def test_update_vat_grch37(
508512
self,
513+
mock_vep_validate: Mock,
509514
mock_vep: Mock,
510515
mock_update_rdc_task: Mock,
511516
) -> None:
512517
mock_update_rdc_task.return_value = MockCompleteTask()
513518
mock_vep.side_effect = lambda ht, **_: ht.annotate(vep=MOCK_VEP_DATA)
519+
mock_vep_validate.return_value = None
514520
worker = luigi.worker.Worker()
515521
uvatwns_task = UpdateVariantAnnotationsTableWithNewSamplesTask(
516522
reference_genome=ReferenceGenome.GRCh37,
@@ -551,8 +557,10 @@ def test_update_vat_grch37(
551557

552558
@patch('v03_pipeline.lib.model.reference_dataset_collection.Env')
553559
@patch('v03_pipeline.lib.vep.hl.vep')
560+
@patch('v03_pipeline.lib.vep.validate_vep_config_reference_genome')
554561
def test_update_vat_without_accessing_private_datasets(
555562
self,
563+
mock_vep_validate: Mock,
556564
mock_vep: Mock,
557565
mock_rdc_env: Mock,
558566
mock_update_rdc_task: Mock,
@@ -567,6 +575,7 @@ def test_update_vat_without_accessing_private_datasets(
567575
)
568576
mock_rdc_env.ACCESS_PRIVATE_REFERENCE_DATASETS = False
569577
mock_vep.side_effect = lambda ht, **_: ht.annotate(vep=MOCK_VEP_DATA)
578+
mock_vep_validate.return_value = None
570579
worker = luigi.worker.Worker()
571580
uvatwns_task = UpdateVariantAnnotationsTableWithNewSamplesTask(
572581
reference_genome=ReferenceGenome.GRCh38,

v03_pipeline/lib/vep.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,29 @@
11
import hail as hl
22

3-
from v03_pipeline.lib.model import DatasetType
3+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
4+
5+
6+
def validate_vep_config_reference_genome(reference_genome, config: str) -> None:
7+
with open(config) as f:
8+
if reference_genome.value not in f.read():
9+
msg = f'Vep config does not match supplied reference genome {reference_genome.value}'
10+
raise ValueError(msg)
411

512

613
def run_vep(
714
ht: hl.Table,
815
dataset_type: DatasetType,
16+
reference_genome: ReferenceGenome,
917
vep_config_json_path: str | None,
1018
) -> hl.Table:
1119
if not dataset_type.veppable:
1220
return ht
1321
config = (
1422
vep_config_json_path
1523
if vep_config_json_path is not None
16-
else 'file:///vep_data/vep-gcloud.json'
24+
else f'file:///vep_data/vep-{reference_genome.value}-gcloud.json'
1725
)
26+
validate_vep_config_reference_genome(reference_genome, config)
1827
return hl.vep(
1928
ht,
2029
config=config,

0 commit comments

Comments
 (0)