From 5027eb3bffee342b32183db57575a890cce42efc Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 6 Sep 2024 15:52:30 -0400 Subject: [PATCH 1/9] fix lookup migration bug --- v03_pipeline/lib/tasks/update_lookup_table.py | 1 + v03_pipeline/migrations/lookup/0003_add_sample_type.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/v03_pipeline/lib/tasks/update_lookup_table.py b/v03_pipeline/lib/tasks/update_lookup_table.py index 5828bd141..26f525ecc 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table.py +++ b/v03_pipeline/lib/tasks/update_lookup_table.py @@ -115,5 +115,6 @@ def update_table(self, ht: hl.Table) -> hl.Table: ), ), ), + migrations=ht.migrations, ) return ht diff --git a/v03_pipeline/migrations/lookup/0003_add_sample_type.py b/v03_pipeline/migrations/lookup/0003_add_sample_type.py index 0f02c1f09..db23670c8 100644 --- a/v03_pipeline/migrations/lookup/0003_add_sample_type.py +++ b/v03_pipeline/migrations/lookup/0003_add_sample_type.py @@ -55,9 +55,10 @@ def migrate( remap_pedigree_hash: int32 }> """ - ht = ht.transmute_globals( - project_sample_types=ht.globals.project_guids, - ) + if 'project_sample_types' not in list(ht.globals.keys()): + ht = ht.transmute_globals( + project_sample_types=ht.globals.project_guids, + ) collected_globals = ht.globals.collect()[0] project_sample_types = collected_globals['project_sample_types'] project_families = collected_globals['project_families'] From cb968b1e37b17ab4a88ec37121381999123937be Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 6 Sep 2024 15:55:31 -0400 Subject: [PATCH 2/9] nvm --- v03_pipeline/migrations/lookup/0003_add_sample_type.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/v03_pipeline/migrations/lookup/0003_add_sample_type.py b/v03_pipeline/migrations/lookup/0003_add_sample_type.py index db23670c8..0f02c1f09 100644 --- a/v03_pipeline/migrations/lookup/0003_add_sample_type.py +++ b/v03_pipeline/migrations/lookup/0003_add_sample_type.py @@ -55,10 +55,9 @@ def migrate( remap_pedigree_hash: int32 }> """ - if 'project_sample_types' not in list(ht.globals.keys()): - ht = ht.transmute_globals( - project_sample_types=ht.globals.project_guids, - ) + ht = ht.transmute_globals( + project_sample_types=ht.globals.project_guids, + ) collected_globals = ht.globals.collect()[0] project_sample_types = collected_globals['project_sample_types'] project_families = collected_globals['project_families'] From 1fb4eea5f74852afdcc9621f392a76afcfa6da37 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 6 Sep 2024 15:56:48 -0400 Subject: [PATCH 3/9] and the tests --- v03_pipeline/lib/tasks/update_lookup_table_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/v03_pipeline/lib/tasks/update_lookup_table_test.py b/v03_pipeline/lib/tasks/update_lookup_table_test.py index 6bdca1ce4..ebf6b0eef 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_test.py @@ -51,6 +51,7 @@ def test_skip_update_lookup_table_task(self) -> None: ), ), }, + migrations=[], ), ], ) @@ -89,6 +90,7 @@ def test_update_lookup_table_task(self) -> None: ), ), }, + migrations=[], ), ], ) From 65326bcbbe9488d9bc548d0eb4adbbf6e5e0e3e8 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 6 Sep 2024 16:36:43 -0400 Subject: [PATCH 4/9] base task --- v03_pipeline/lib/tasks/base/base_update_lookup_table.py | 1 + 1 file changed, 1 insertion(+) diff --git a/v03_pipeline/lib/tasks/base/base_update_lookup_table.py b/v03_pipeline/lib/tasks/base/base_update_lookup_table.py index 0c9aa719e..1dcb5f58d 100644 --- a/v03_pipeline/lib/tasks/base/base_update_lookup_table.py +++ b/v03_pipeline/lib/tasks/base/base_update_lookup_table.py @@ -46,5 +46,6 @@ def initialize_table(self) -> hl.Table: remap_pedigree_hash=hl.tint32, ), ), + migrations=hl.empty_array(hl.tstr), ), ) From 0ebce9b7d2cdd51768e312bc397d1ff23509341a Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 6 Sep 2024 17:12:18 -0400 Subject: [PATCH 5/9] more tests --- .../lib/tasks/update_lookup_table_with_deleted_project_test.py | 3 +++ ...date_variant_annotations_table_with_deleted_project_test.py | 1 + 2 files changed, 4 insertions(+) diff --git a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py index b0cb2db80..1276d724b 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py @@ -32,6 +32,7 @@ def test_delete_project_empty_table( project_sample_types=[], project_families={}, updates=set(), + migrations=[], ), ], ) @@ -136,6 +137,7 @@ def test_delete_project( remap_pedigree_hash=123, ), }, + migrations=[], ), ) worker = luigi.worker.Worker() @@ -162,6 +164,7 @@ def test_delete_project( remap_pedigree_hash=123, ), }, + migrations=[], ), ], ) diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py index e493fbc30..b152b5f07 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py @@ -107,6 +107,7 @@ def setUp(self) -> None: remap_pedigree_hash=123, ), }, + migrations=[], ), ) ht.write( From ac988a1c8826c71ffd89fe1c17f9fbded1f46bad Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Fri, 6 Sep 2024 17:30:31 -0400 Subject: [PATCH 6/9] fix fix fix --- .../lib/tasks/update_lookup_table_with_deleted_project_test.py | 2 +- ...pdate_variant_annotations_table_with_deleted_project_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py index 1276d724b..cf0cfb61a 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py @@ -137,7 +137,7 @@ def test_delete_project( remap_pedigree_hash=123, ), }, - migrations=[], + migrations=hl.empty_array(hl.tstr), ), ) worker = luigi.worker.Worker() diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py index b152b5f07..fb5c4ccfd 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py @@ -107,7 +107,7 @@ def setUp(self) -> None: remap_pedigree_hash=123, ), }, - migrations=[], + migrations=hl.empty_array(hl.tstr), ), ) ht.write( From 01c65f4a3cea687bd74fd54d04d4727b657c68eb Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Sun, 8 Sep 2024 21:42:11 -0400 Subject: [PATCH 7/9] update_lookup_table_with_deleted_families_test --- .../tasks/update_lookup_table_with_deleted_families_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py index 098b97ffb..75caab822 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py @@ -33,6 +33,7 @@ def test_delete_project_empty_table( project_sample_types=[], project_families={}, updates=set(), + migrations=[], ), ], ) @@ -137,6 +138,7 @@ def test_delete_project( remap_pedigree_hash=123, ), }, + migrations=hl.empty_array(hl.tstr), ), ) worker = luigi.worker.Worker() @@ -172,6 +174,7 @@ def test_delete_project( remap_pedigree_hash=123, ), }, + migrations=[], ), ], ) From 4d7c07e42e7259fd3bb461d6cbb5172a25132f86 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Tue, 10 Sep 2024 11:17:14 -0700 Subject: [PATCH 8/9] validation (#889) --- v03_pipeline/lib/misc/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/lib/misc/validation.py b/v03_pipeline/lib/misc/validation.py index 6fac170b3..5540533fd 100644 --- a/v03_pipeline/lib/misc/validation.py +++ b/v03_pipeline/lib/misc/validation.py @@ -155,7 +155,7 @@ def validate_sample_type( msg = f'Sample type validation error: dataset contains noncoding variants but is missing common coding variants for {reference_genome.value}. Please verify that the dataset contains coding variants.' raise SeqrValidationError(msg) if has_coding and not has_noncoding and sample_type != SampleType.WES: - msg = 'Sample type validation error: dataset sample-type is specified as WGS but appears to be WES because it contains many common coding variants' + msg = 'Sample type validation error: dataset sample-type is specified as WGS but appears to be WES because it contains many common coding variants but is missing common non-coding variants' raise SeqrValidationError(msg) if has_noncoding and has_coding and sample_type != SampleType.WGS: msg = 'Sample type validation error: dataset sample-type is specified as WES but appears to be WGS because it contains many common non-coding variants' From 552acddc4ceb03507e72071c0ac97a946e0937d6 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 12 Sep 2024 04:55:42 -0700 Subject: [PATCH 9/9] Update environment variables (#893) * Update environment variables * tweak tmp dir * Fix double dir * fix bad _dir --- .github/workflows/unit-tests.yml | 1 + v03_pipeline/lib/misc/allele_registry.py | 4 +-- v03_pipeline/lib/misc/allele_registry_test.py | 2 +- v03_pipeline/lib/misc/io.py | 2 +- v03_pipeline/lib/model/environment.py | 25 +++++++------- v03_pipeline/lib/paths.py | 34 +++++++++---------- v03_pipeline/lib/paths_test.py | 2 +- v03_pipeline/lib/reference_data/clinvar.py | 4 +-- .../lib/tasks/base/base_hail_table.py | 2 +- ...annotations_table_with_new_samples_test.py | 4 +-- 10 files changed, 41 insertions(+), 39 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 3c20ee35c..c8e0894c1 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -36,6 +36,7 @@ jobs: run: ruff . --output-format github - name: Unit Tests run: | + export HAIL_TMP_DIR=/tmp export GRCH37_TO_GRCH38_LIFTOVER_REF_PATH=v03_pipeline/var/test/liftover/grch37_to_grch38.over.chain.gz export GRCH38_TO_GRCH37_LIFTOVER_REF_PATH=v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz export ACCESS_PRIVATE_REFERENCE_DATASETS=1 diff --git a/v03_pipeline/lib/misc/allele_registry.py b/v03_pipeline/lib/misc/allele_registry.py index 2c12538d5..e1ad9e1a2 100644 --- a/v03_pipeline/lib/misc/allele_registry.py +++ b/v03_pipeline/lib/misc/allele_registry.py @@ -77,8 +77,8 @@ def register_alleles( base_url: str, ) -> hl.Table: uuid4 = uuid.uuid4() - raw_vcf_file_name = f'{Env.HAIL_TMPDIR}/r_{uuid4}.vcf' - formatted_vcf_file_name = f'{Env.HAIL_TMPDIR}/f_{uuid4}.vcf' + raw_vcf_file_name = f'{Env.HAIL_TMP_DIR}/r_{uuid4}.vcf' + formatted_vcf_file_name = f'{Env.HAIL_TMP_DIR}/f_{uuid4}.vcf' # Export the variants to a VCF hl.export_vcf(ht, raw_vcf_file_name) diff --git a/v03_pipeline/lib/misc/allele_registry_test.py b/v03_pipeline/lib/misc/allele_registry_test.py index 83a9ceda6..45eea3061 100644 --- a/v03_pipeline/lib/misc/allele_registry_test.py +++ b/v03_pipeline/lib/misc/allele_registry_test.py @@ -39,7 +39,7 @@ def test_register_alleles_38( mock_put_request: Mock, ): mock_get_credentials.return_value = ('', '') - mock_env.HAIL_TMPDIR = self.temp_dir.name + mock_env.HAIL_TMP_DIR = self.temp_dir.name new_variants_ht = hl.Table.parallelize( [ diff --git a/v03_pipeline/lib/misc/io.py b/v03_pipeline/lib/misc/io.py index 599e791b8..ef2b26ecd 100644 --- a/v03_pipeline/lib/misc/io.py +++ b/v03_pipeline/lib/misc/io.py @@ -217,7 +217,7 @@ def checkpoint(t: hl.Table | hl.MatrixTable) -> tuple[hl.Table | hl.MatrixTable, suffix = 'mt' if isinstance(t, hl.MatrixTable) else 'ht' read_fn = hl.read_matrix_table if isinstance(t, hl.MatrixTable) else hl.read_table checkpoint_path = os.path.join( - Env.HAIL_TMPDIR, + Env.HAIL_TMP_DIR, f'{uuid.uuid4()}.{suffix}', ) t.write(checkpoint_path) diff --git a/v03_pipeline/lib/model/environment.py b/v03_pipeline/lib/model/environment.py index 277f6a852..91e69a6a2 100644 --- a/v03_pipeline/lib/model/environment.py +++ b/v03_pipeline/lib/model/environment.py @@ -2,8 +2,6 @@ from dataclasses import dataclass # NB: using os.environ.get inside the dataclass defaults gives a lint error. -HAIL_TMPDIR = os.environ.get('HAIL_TMPDIR', '/tmp') # noqa: S108 -HAIL_SEARCH_DATA = os.environ.get('HAIL_SEARCH_DATA', '/seqr/hail-search-data') GRCH37_TO_GRCH38_LIFTOVER_REF_PATH = os.environ.get( 'GRCH37_TO_GRCH38_LIFTOVER_REF_PATH', 'gs://hail-common/references/grch37_to_grch38.over.chain.gz', @@ -12,15 +10,18 @@ 'GRCH38_TO_GRCH37_LIFTOVER_REF_PATH', 'gs://hail-common/references/grch38_to_grch37.over.chain.gz', ) -LOADING_DATASETS = os.environ.get('LOADING_DATASETS', '/seqr/seqr-loading-temp') -PRIVATE_REFERENCE_DATASETS = os.environ.get( - 'PRIVATE_REFERENCE_DATASETS', +HAIL_TMP_DIR = os.environ.get('HAIL_TMP_DIR', '/seqr/tmp') +HAIL_SEARCH_DATA_DIR = os.environ.get('HAIL_SEARCH_DATA_DIR', '/seqr/hail-search-data') +LOADING_DATASETS_DIR = os.environ.get('LOADING_DATASETS_DIR', '/seqr/seqr-loading-temp') +PRIVATE_REFERENCE_DATASETS_DIR_DIR = os.environ.get( + 'PRIVATE_REFERENCE_DATASETS_DIR_DIR', '/seqr/seqr-reference-data-private', ) -REFERENCE_DATASETS = os.environ.get( - 'REFERENCE_DATASETS', +REFERENCE_DATASETS_DIR = os.environ.get( + 'REFERENCE_DATASETS_DIR', '/seqr/seqr-reference-data', ) + # Allele registry secrets :/ ALLELE_REGISTRY_SECRET_NAME = os.environ.get('ALLELE_REGISTRY_SECRET_NAME', None) PROJECT_ID = os.environ.get('PROJECT_ID', None) @@ -40,12 +41,12 @@ class Env: ALLELE_REGISTRY_SECRET_NAME: str | None = ALLELE_REGISTRY_SECRET_NAME CHECK_SEX_AND_RELATEDNESS: bool = CHECK_SEX_AND_RELATEDNESS EXPECT_WES_FILTERS: bool = EXPECT_WES_FILTERS - HAIL_TMPDIR: str = HAIL_TMPDIR - HAIL_SEARCH_DATA: str = HAIL_SEARCH_DATA + HAIL_TMP_DIR: str = HAIL_TMP_DIR + HAIL_SEARCH_DATA_DIR: str = HAIL_SEARCH_DATA_DIR GRCH37_TO_GRCH38_LIFTOVER_REF_PATH: str = GRCH37_TO_GRCH38_LIFTOVER_REF_PATH GRCH38_TO_GRCH37_LIFTOVER_REF_PATH: str = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH - LOADING_DATASETS: str = LOADING_DATASETS - PRIVATE_REFERENCE_DATASETS: str = PRIVATE_REFERENCE_DATASETS + LOADING_DATASETS_DIR: str = LOADING_DATASETS_DIR + PRIVATE_REFERENCE_DATASETS_DIR_DIR: str = PRIVATE_REFERENCE_DATASETS_DIR_DIR PROJECT_ID: str | None = PROJECT_ID - REFERENCE_DATASETS: str = REFERENCE_DATASETS + REFERENCE_DATASETS_DIR: str = REFERENCE_DATASETS_DIR SHOULD_REGISTER_ALLELES: bool = SHOULD_REGISTER_ALLELES diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 5512e24dd..44dbb5310 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -32,9 +32,9 @@ def _v03_reference_data_prefix( reference_genome: ReferenceGenome, ) -> str: root = ( - Env.PRIVATE_REFERENCE_DATASETS + Env.PRIVATE_REFERENCE_DATASETS_DIR_DIR if access_control == AccessControl.PRIVATE - else Env.REFERENCE_DATASETS + else Env.REFERENCE_DATASETS_DIR ) return os.path.join( root, @@ -67,7 +67,7 @@ def family_table_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.HAIL_SEARCH_DATA, + Env.HAIL_SEARCH_DATA_DIR, reference_genome, dataset_type, ), @@ -84,7 +84,7 @@ def imputed_sex_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.LOADING_DATASETS, + Env.LOADING_DATASETS_DIR, reference_genome, dataset_type, ), @@ -100,7 +100,7 @@ def imported_callset_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.LOADING_DATASETS, + Env.LOADING_DATASETS_DIR, reference_genome, dataset_type, ), @@ -132,7 +132,7 @@ def project_table_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.HAIL_SEARCH_DATA, + Env.HAIL_SEARCH_DATA_DIR, reference_genome, dataset_type, ), @@ -149,7 +149,7 @@ def relatedness_check_table_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.LOADING_DATASETS, + Env.LOADING_DATASETS_DIR, reference_genome, dataset_type, ), @@ -166,7 +166,7 @@ def remapped_and_subsetted_callset_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.LOADING_DATASETS, + Env.LOADING_DATASETS_DIR, reference_genome, dataset_type, ), @@ -182,7 +182,7 @@ def lookup_table_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.HAIL_SEARCH_DATA, + Env.HAIL_SEARCH_DATA_DIR, reference_genome, dataset_type, ), @@ -196,7 +196,7 @@ def runs_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.HAIL_SEARCH_DATA, + Env.HAIL_SEARCH_DATA_DIR, reference_genome, dataset_type, ), @@ -211,7 +211,7 @@ def sex_check_table_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.LOADING_DATASETS, + Env.LOADING_DATASETS_DIR, reference_genome, dataset_type, ), @@ -265,7 +265,7 @@ def variant_annotations_table_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.HAIL_SEARCH_DATA, + Env.HAIL_SEARCH_DATA_DIR, reference_genome, dataset_type, ), @@ -279,7 +279,7 @@ def variant_annotations_vcf_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.HAIL_SEARCH_DATA, + Env.HAIL_SEARCH_DATA_DIR, reference_genome, dataset_type, ), @@ -304,7 +304,7 @@ def new_variants_table_path( def clinvar_dataset_path(reference_genome: ReferenceGenome, etag: str) -> str: return os.path.join( - Env.HAIL_TMPDIR, + Env.HAIL_TMP_DIR, f'clinvar-{reference_genome.value}-{etag}.ht', ) @@ -317,7 +317,7 @@ def project_remap_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.LOADING_DATASETS, + Env.LOADING_DATASETS_DIR, reference_genome, dataset_type, ), @@ -335,7 +335,7 @@ def project_pedigree_path( ) -> str: return os.path.join( _pipeline_prefix( - Env.LOADING_DATASETS, + Env.LOADING_DATASETS_DIR, reference_genome, dataset_type, ), @@ -347,7 +347,7 @@ def project_pedigree_path( def loading_pipeline_queue_path() -> str: return os.path.join( - Env.LOADING_DATASETS, + Env.LOADING_DATASETS_DIR, 'loading_pipeline_queue', 'request.json', ) diff --git a/v03_pipeline/lib/paths_test.py b/v03_pipeline/lib/paths_test.py index 4b595e7e1..59081e647 100644 --- a/v03_pipeline/lib/paths_test.py +++ b/v03_pipeline/lib/paths_test.py @@ -50,7 +50,7 @@ def test_family_table_path(self) -> None: '/seqr/hail-search-data/v3.1/GRCh37/SNV_INDEL/families/WES/franklin.ht', ) with patch('v03_pipeline.lib.paths.Env') as mock_env: - mock_env.HAIL_SEARCH_DATA = 'gs://seqr-datasets/' + mock_env.HAIL_SEARCH_DATA_DIR = 'gs://seqr-datasets/' self.assertEqual( family_table_path( ReferenceGenome.GRCh37, diff --git a/v03_pipeline/lib/reference_data/clinvar.py b/v03_pipeline/lib/reference_data/clinvar.py index b9b59f89d..5e1980e32 100644 --- a/v03_pipeline/lib/reference_data/clinvar.py +++ b/v03_pipeline/lib/reference_data/clinvar.py @@ -139,7 +139,7 @@ def download_and_import_latest_clinvar_vcf( with tempfile.NamedTemporaryFile(suffix='.vcf.gz', delete=False) as tmp_file: urllib.request.urlretrieve(clinvar_url, tmp_file.name) # noqa: S310 gcs_tmp_file_name = os.path.join( - Env.HAIL_TMPDIR, + Env.HAIL_TMP_DIR, os.path.basename(tmp_file.name), ) safely_move_to_gcs(tmp_file.name, gcs_tmp_file_name) @@ -203,7 +203,7 @@ def download_and_import_clinvar_submission_summary() -> hl.Table: shutil.copyfileobj(f_in, f_out) gcs_tmp_file_name = os.path.join( - Env.HAIL_TMPDIR, + Env.HAIL_TMP_DIR, os.path.basename(unzipped_tmp_file.name), ) safely_move_to_gcs(unzipped_tmp_file.name, gcs_tmp_file_name) diff --git a/v03_pipeline/lib/tasks/base/base_hail_table.py b/v03_pipeline/lib/tasks/base/base_hail_table.py index 7f8a84a95..b5f7a0262 100644 --- a/v03_pipeline/lib/tasks/base/base_hail_table.py +++ b/v03_pipeline/lib/tasks/base/base_hail_table.py @@ -23,7 +23,7 @@ def complete(self) -> bool: def init_hail(self): # Need to use the GCP bucket as temp storage for very large callset joins - hl.init(tmp_dir=Env.HAIL_TMPDIR, idempotent=True) + hl.init(tmp_dir=Env.HAIL_TMP_DIR, idempotent=True) # Interval ref data join causes shuffle death, this prevents it hl._set_flags(use_new_shuffle='1', no_whole_stage_codegen='1') # noqa: SLF001 diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index e1730e25d..9a678786b 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -1221,7 +1221,7 @@ def test_sv_update_vat( self.assertTrue(update_variant_annotations_task.complete()) self.assertFalse( GCSorLocalFolderTarget( - f'{self.mock_env.REFERENCE_DATASETS}/v03/GRCh38/SV/lookup.ht', + f'{self.mock_env.REFERENCE_DATASETS_DIR}/v03/GRCh38/SV/lookup.ht', ).exists(), ) ht = hl.read_table(update_variant_annotations_task.output().path) @@ -1800,7 +1800,7 @@ def test_gcnv_update_vat( self.assertTrue(update_variant_annotations_task.complete()) self.assertFalse( GCSorLocalFolderTarget( - f'{self.mock_env.REFERENCE_DATASETS}/v03/GRCh38/GCNV/lookup.ht', + f'{self.mock_env.REFERENCE_DATASETS_DIR}/v03/GRCh38/GCNV/lookup.ht', ).exists(), ) ht = hl.read_table(update_variant_annotations_task.output().path)