Skip to content

Commit 1cf3f7d

Browse files
authored
Checkpoint no longer necessary with improved caching (#840)
* Checkpoint no longer necessary with improved caching * move the reset to clinvar
1 parent 13655e1 commit 1cf3f7d

File tree

2 files changed

+2
-7
lines changed

2 files changed

+2
-7
lines changed

v03_pipeline/lib/reference_data/clinvar.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,10 @@ def get_clinvar_ht(
125125
ht = hl.read_table(clinvar_ht_path)
126126
else:
127127
logger.info('Cached clinvar ht not found, downloading latest clinvar vcf')
128+
hl._set_flags(use_new_shuffle=None, no_whole_stage_codegen='1') # noqa: SLF001
128129
ht = download_and_import_latest_clinvar_vcf(clinvar_url, reference_genome)
129130
write(ht, clinvar_ht_path, repartition=False)
131+
hl._set_flags(use_new_shuffle='1', no_whole_stage_codegen='1') # noqa: SLF001
130132
return ht
131133

132134

v03_pipeline/lib/reference_data/dataset_table_operations.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import hail as hl
55
import pytz
66

7-
from v03_pipeline.lib.misc.io import checkpoint
87
from v03_pipeline.lib.misc.nested_field import parse_nested_field
98
from v03_pipeline.lib.model import (
109
DatasetType,
@@ -36,10 +35,7 @@ def update_or_create_joined_ht(
3635
continue
3736

3837
# Join the new one!
39-
hl._set_flags(use_new_shuffle=None, no_whole_stage_codegen='1') # noqa: SLF001
4038
dataset_ht = get_dataset_ht(dataset, reference_genome)
41-
dataset_ht, _ = checkpoint(dataset_ht)
42-
hl._set_flags(use_new_shuffle='1', no_whole_stage_codegen='1') # noqa: SLF001
4339
joined_ht = joined_ht.join(dataset_ht, 'outer')
4440
joined_ht = annotate_dataset_globals(joined_ht, dataset, dataset_ht)
4541

@@ -216,10 +212,7 @@ def join_hts(
216212
),
217213
)
218214
for dataset in reference_dataset_collection.datasets(dataset_type):
219-
hl._set_flags(use_new_shuffle=None, no_whole_stage_codegen='1') # noqa: SLF001
220215
dataset_ht = get_dataset_ht(dataset, reference_genome)
221-
dataset_ht, _ = checkpoint(dataset_ht)
222-
hl._set_flags(use_new_shuffle='1', no_whole_stage_codegen='1') # noqa: SLF001
223216
joined_ht = joined_ht.join(dataset_ht, 'outer')
224217
joined_ht = annotate_dataset_globals(joined_ht, dataset, dataset_ht)
225218
return joined_ht

0 commit comments

Comments
 (0)