Skip to content

Commit 1bfe3b3

Browse files
authored
Fix liftover caching bug in tests (#878)
* env vars * remove liftover from hail * ruff * spelling * move remove_liftover * set value on mock * ruff * move from before tasks to after tasks * move it back * Update base_hail_table.py * Update base_hail_table.py * ws
1 parent d68bdc4 commit 1bfe3b3

File tree

5 files changed

+27
-2
lines changed

5 files changed

+27
-2
lines changed

.github/workflows/unit-tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ jobs:
3636
run: ruff . --output-format github
3737
- name: Unit Tests
3838
run: |
39+
export GRCH37_TO_GRCH38_LIFTOVER_REF_PATH=v03_pipeline/var/test/liftover/grch37_to_grch38.over.chain.gz
40+
export GRCH38_TO_GRCH37_LIFTOVER_REF_PATH=v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz
3941
export ACCESS_PRIVATE_REFERENCE_DATASETS=1
4042
export PYSPARK_SUBMIT_ARGS='--driver-memory 8G pyspark-shell'
4143
nosetests --with-coverage --cover-package v03_pipeline/lib v03_pipeline/lib

v03_pipeline/lib/annotations/liftover.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,12 @@ def add_rg37_liftover(grch37_to_grch38_liftover_ref_path: str) -> None:
1515
rg38 = hl.get_reference(ReferenceGenome.GRCh38.value)
1616
if not rg37.has_liftover(rg38):
1717
rg37.add_liftover(grch37_to_grch38_liftover_ref_path, rg38)
18+
19+
20+
def remove_liftover():
21+
rg37 = hl.get_reference(ReferenceGenome.GRCh37.value)
22+
rg38 = hl.get_reference(ReferenceGenome.GRCh38.value)
23+
if rg37.has_liftover(rg38):
24+
rg37.remove_liftover(rg38)
25+
if rg38.has_liftover(rg37):
26+
rg38.remove_liftover(rg37)

v03_pipeline/lib/tasks/base/base_hail_table.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import hail as hl
22
import luigi
33

4+
from v03_pipeline.lib.annotations.liftover import remove_liftover
45
from v03_pipeline.lib.logger import get_logger
56
from v03_pipeline.lib.model import Env
67
from v03_pipeline.lib.tasks.base.base_loading_pipeline_params import (
@@ -27,6 +28,13 @@ def init_hail(self):
2728
# Interval ref data join causes shuffle death, this prevents it
2829
hl._set_flags(use_new_shuffle='1', no_whole_stage_codegen='1') # noqa: SLF001
2930

31+
# Ensure any cached liftover files within Hail are cleared
32+
# to provide a clean context free of hidden state.
33+
# This runs "before" a task to account for situations where
34+
# the Hail write fails and we do not have the chance to
35+
# run this method in the "after".
36+
remove_liftover()
37+
3038

3139
# NB: these are defined over luigi.Task instead of the BaseHailTableTask so that
3240
# they work on file dependencies.

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
4545
from v03_pipeline.var.test.vep.mock_vep_data import MOCK_37_VEP_DATA, MOCK_38_VEP_DATA
4646

47+
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = (
48+
'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz'
49+
)
4750
TEST_MITO_MT = 'v03_pipeline/var/test/callsets/mito_1.mt'
4851
TEST_SNV_INDEL_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf'
4952
TEST_SV_VCF = 'v03_pipeline/var/test/callsets/sv_1.vcf'
@@ -248,6 +251,7 @@ def test_multiple_update_vat(
248251
{'ENST00000327044': 'NM_015658.4'},
249252
)
250253
# make register_alleles return CAIDs for 4 of 30 variants
254+
mock_env.GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
251255
mock_env.SHOULD_REGISTER_ALLELES = True
252256
mock_register_alleles.side_effect = [
253257
iter(

v03_pipeline/lib/tasks/write_new_variants_table.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212
from v03_pipeline.lib.misc.callsets import get_callset_ht
1313
from v03_pipeline.lib.misc.io import remap_pedigree_hash
1414
from v03_pipeline.lib.misc.math import constrain
15-
from v03_pipeline.lib.model import Env, ReferenceDatasetCollection
15+
from v03_pipeline.lib.model import (
16+
Env,
17+
ReferenceDatasetCollection,
18+
)
1619
from v03_pipeline.lib.paths import (
1720
new_variants_table_path,
1821
variant_annotations_table_path,
@@ -214,7 +217,6 @@ def create_table(self) -> hl.Table:
214217
):
215218
ar_ht = ar_ht.union(ar_ht_chunk)
216219
new_variants_ht = new_variants_ht.join(ar_ht, 'left')
217-
218220
return new_variants_ht.select_globals(
219221
updates={
220222
hl.Struct(

0 commit comments

Comments
 (0)