From 907eee4edfa946bfc0d96b557fcee7c3c95c6f5e Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 12:45:42 -0400 Subject: [PATCH 01/17] use predetermined filters/imputed_sex paths --- v03_pipeline/lib/model/environment.py | 18 ++++++--- v03_pipeline/lib/paths.py | 37 +++++++++++++++++++ v03_pipeline/lib/paths_test.py | 31 ++++++++++++++++ .../lib/tasks/base/base_loading_run_params.py | 10 ----- .../lib/tasks/write_imported_callset.py | 22 ++++++++--- .../lib/tasks/write_sex_check_table.py | 11 ++++-- 6 files changed, 105 insertions(+), 24 deletions(-) diff --git a/v03_pipeline/lib/model/environment.py b/v03_pipeline/lib/model/environment.py index d89567d8b..831f430c6 100644 --- a/v03_pipeline/lib/model/environment.py +++ b/v03_pipeline/lib/model/environment.py @@ -2,10 +2,6 @@ from dataclasses import dataclass # NB: using os.environ.get inside the dataclass defaults gives a lint error. -ACCESS_PRIVATE_REFERENCE_DATASETS = ( - os.environ.get('ACCESS_PRIVATE_REFERENCE_DATASETS') == '1' -) -REFERENCE_DATA_AUTO_UPDATE = os.environ.get('REFERENCE_DATA_AUTO_UPDATE') == '1' HAIL_TMPDIR = os.environ.get('HAIL_TMPDIR', '/tmp') # noqa: S108 HAIL_SEARCH_DATA = os.environ.get('HAIL_SEARCH_DATA', '/hail-search-data') LOADING_DATASETS = os.environ.get('LOADING_DATASETS', '/seqr-loading-temp') @@ -19,22 +15,32 @@ ) VEP_CONFIG_PATH = os.environ.get('VEP_CONFIG_PATH', None) VEP_CONFIG_URI = os.environ.get('VEP_CONFIG_URI', None) -SHOULD_REGISTER_ALLELES = os.environ.get('SHOULD_REGISTER_ALLELES') == '1' + +# Allele registry secrets :/ ALLELE_REGISTRY_SECRET_NAME = os.environ.get('ALLELE_REGISTRY_SECRET_NAME', None) PROJECT_ID = os.environ.get('PROJECT_ID', None) +# Feature Flags +ACCESS_PRIVATE_REFERENCE_DATASETS = ( + os.environ.get('ACCESS_PRIVATE_REFERENCE_DATASETS') == '1' +) +EXPECT_WES_FILTERS = os.environ.get('EXPECT_WES_FILTERS') == '1' +REFERENCE_DATA_AUTO_UPDATE = os.environ.get('REFERENCE_DATA_AUTO_UPDATE') == '1' +SHOULD_REGISTER_ALLELES = os.environ.get('SHOULD_REGISTER_ALLELES') == '1' + @dataclass class Env: ACCESS_PRIVATE_REFERENCE_DATASETS: bool = ACCESS_PRIVATE_REFERENCE_DATASETS ALLELE_REGISTRY_SECRET_NAME: str | None = ALLELE_REGISTRY_SECRET_NAME - REFERENCE_DATA_AUTO_UPDATE: bool = REFERENCE_DATA_AUTO_UPDATE + EXPECT_WES_FILTERS: bool = EXPECT_WES_FILTERS HAIL_TMPDIR: str = HAIL_TMPDIR HAIL_SEARCH_DATA: str = HAIL_SEARCH_DATA LOADING_DATASETS: str = LOADING_DATASETS PRIVATE_REFERENCE_DATASETS: str = PRIVATE_REFERENCE_DATASETS PROJECT_ID: str | None = PROJECT_ID REFERENCE_DATASETS: str = REFERENCE_DATASETS + REFERENCE_DATA_AUTO_UPDATE: bool = REFERENCE_DATA_AUTO_UPDATE SHOULD_REGISTER_ALLELES: bool = SHOULD_REGISTER_ALLELES VEP_CONFIG_PATH: str | None = VEP_CONFIG_PATH VEP_CONFIG_URI: str | None = VEP_CONFIG_URI diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 14482d831..c6f987a77 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -1,5 +1,6 @@ import hashlib import os +import re from v03_pipeline.lib.model import ( AccessControl, @@ -9,6 +10,7 @@ PipelineVersion, ReferenceDatasetCollection, ReferenceGenome, + SampleType, ) @@ -73,6 +75,22 @@ def family_table_path( ) +def imputed_sex_path( + reference_genome: ReferenceGenome, + dataset_type: DatasetType, + callset_path: str, +) -> str: + return os.path.join( + _v03_pipeline_prefix( + Env.LOADING_DATASETS, + reference_genome, + dataset_type, + ), + 'imputed_sex', + f'{hashlib.sha256(callset_path.encode("utf8")).hexdigest()}.tsv', + ) + + def imported_callset_path( reference_genome: ReferenceGenome, dataset_type: DatasetType, @@ -198,6 +216,25 @@ def sex_check_table_path( ) +def valid_filters_path( + dataset_type: DatasetType, + sample_type: SampleType, + callset_path: str, +) -> str | None: + if ( + not Env.EXPECT_WES_FILTERS + or dataset_type != DatasetType.SNV_INDEL + or sample_type != SampleType.WES + or 'part_one_outputs' not in callset_path + ): + return None + return re.sub( + 'part_one_outputs/.*$', + 'part_two_outputs/*.filtered.*.vcf.gz', + callset_path, + ) + + def valid_reference_dataset_collection_path( reference_genome: ReferenceGenome, dataset_type: DatasetType, diff --git a/v03_pipeline/lib/paths_test.py b/v03_pipeline/lib/paths_test.py index d6f0b10ba..ccb0c47c7 100644 --- a/v03_pipeline/lib/paths_test.py +++ b/v03_pipeline/lib/paths_test.py @@ -6,11 +6,13 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, + SampleType, ) from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, family_table_path, imported_callset_path, + imputed_sex_path, lookup_table_path, metadata_for_run_path, new_variants_table_path, @@ -18,6 +20,7 @@ relatedness_check_table_path, remapped_and_subsetted_callset_path, sex_check_table_path, + valid_filters_path, valid_reference_dataset_collection_path, variant_annotations_table_path, ) @@ -54,6 +57,24 @@ def test_family_table_path(self) -> None: 'gs://seqr-datasets/v03/GRCh37/SNV_INDEL/families/franklin.ht', ) + def test_valid_filters_path(self) -> None: + self.assertEqual( + valid_filters_path( + SampleType.WES, + 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', + ), + None, + ) + with patch('v03_pipeline.lib.paths.Env') as mock_env: + mock_env.EXPECT_WES_FILTERS = True + self.assertEqual( + valid_filters_path( + SampleType.WES, + 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', + ), + 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_two_outputs/*.filtered.*.vcf.gz', + ) + def test_project_table_path(self) -> None: self.assertEqual( project_table_path( @@ -162,6 +183,16 @@ def test_imported_callset_path(self) -> None: '/seqr-loading-temp/v03/GRCh38/SNV_INDEL/imported_callsets/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.mt', ) + def test_imputed_sex_path(self) -> None: + self.assertEqual( + imputed_sex_path( + ReferenceGenome.GRCh38, + DatasetType.SNV_INDEL, + 'gs://abc.efg/callset.vcf.gz', + ), + '/seqr-loading-temp/v03/GRCh38/SNV_INDEL/imputed_sex/ead56bb177a5de24178e1e622ce1d8beb3f8892bdae1c925d22ca0af4013d6dd.tsv', + ) + def test_new_variants_table_path(self) -> None: self.assertEqual( new_variants_table_path( diff --git a/v03_pipeline/lib/tasks/base/base_loading_run_params.py b/v03_pipeline/lib/tasks/base/base_loading_run_params.py index 1bfa204be..f24a0efbf 100644 --- a/v03_pipeline/lib/tasks/base/base_loading_run_params.py +++ b/v03_pipeline/lib/tasks/base/base_loading_run_params.py @@ -10,16 +10,6 @@ class BaseLoadingRunParams(luigi.Task): # but nothing else. sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() - # HINT: OptionalParameter vs Parameter is significant here. - # The default Parameter will case `None` to the string "None". - imputed_sex_path = luigi.OptionalParameter( - default=None, - description='Optional path to a tsv of imputed sex values from the DRAGEN GVS pipeline.', - ) - filters_path = luigi.OptionalParameter( - default=None, - description='Optional path to part two outputs from callset (VCF shards containing filter information)', - ) ignore_missing_samples_when_remapping = luigi.BoolParameter( default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 9cd146af4..b2ff5c356 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -16,12 +16,13 @@ validate_sample_type, ) from v03_pipeline.lib.misc.vets import annotate_vets -from v03_pipeline.lib.model import CachedReferenceDatasetQuery +from v03_pipeline.lib.model import CachedReferenceDatasetQuery, DatasetType, SampleType from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, imported_callset_path, sex_check_table_path, + valid_filters_path, ) from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask @@ -53,10 +54,18 @@ def output(self) -> luigi.Target: def requires(self) -> list[luigi.Task]: requirements = [] - if self.filters_path: + if ( + Env.EXPECT_WES_FILTERS + and self.dataset_type == DatasetType.SNV_INDEL + and self.sample_type == SampleType.WES + ): requirements = [ *requirements, - CallsetTask(self.filters_path), + CallsetTask( + valid_filters_path( + self.dataset_type, self.sample_type, self.callset_path, + ), + ), ] if self.validate and self.dataset_type.can_run_validation: requirements = [ @@ -108,11 +117,14 @@ def additional_row_fields(self, mt): } def create_table(self) -> hl.MatrixTable: + filters_path = valid_filters_path( + self.dataset_type, self.sample_type, self.callset_path, + ) mt = import_callset( self.callset_path, self.reference_genome, self.dataset_type, - self.filters_path, + filters_path, ) mt = select_relevant_fields( mt, @@ -174,6 +186,6 @@ def create_table(self) -> hl.MatrixTable: ) return mt.annotate_globals( callset_path=self.callset_path, - filters_path=self.filters_path or hl.missing(hl.tstr), + filters_path=filters_path or hl.missing(hl.tstr), sample_type=self.sample_type.value, ) diff --git a/v03_pipeline/lib/tasks/write_sex_check_table.py b/v03_pipeline/lib/tasks/write_sex_check_table.py index 801330689..b8b4bb9e7 100644 --- a/v03_pipeline/lib/tasks/write_sex_check_table.py +++ b/v03_pipeline/lib/tasks/write_sex_check_table.py @@ -2,14 +2,13 @@ import luigi from v03_pipeline.lib.misc.io import import_imputed_sex -from v03_pipeline.lib.paths import sex_check_table_path +from v03_pipeline.lib.paths import imputed_sex_path, sex_check_table_path from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget, RawFileTask class WriteSexCheckTableTask(BaseWriteTask): callset_path = luigi.Parameter() - imputed_sex_path = luigi.Parameter() def output(self) -> luigi.Target: return GCSorLocalTarget( @@ -21,7 +20,13 @@ def output(self) -> luigi.Target: ) def requires(self) -> luigi.Task: - return RawFileTask(self.imputed_sex_path) + return RawFileTask( + imputed_sex_path( + self.reference_genome, + self.dataset_type, + self.callset_path, + ), + ) def create_table(self) -> hl.Table: return import_imputed_sex(self.input().path) From f1a8e63481372ff31581baf7288901aa895ff1be Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 12:51:13 -0400 Subject: [PATCH 02/17] ruff --- v03_pipeline/lib/tasks/write_imported_callset.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index b2ff5c356..2b6f59e1a 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -63,7 +63,9 @@ def requires(self) -> list[luigi.Task]: *requirements, CallsetTask( valid_filters_path( - self.dataset_type, self.sample_type, self.callset_path, + self.dataset_type, + self.sample_type, + self.callset_path, ), ), ] @@ -118,7 +120,9 @@ def additional_row_fields(self, mt): def create_table(self) -> hl.MatrixTable: filters_path = valid_filters_path( - self.dataset_type, self.sample_type, self.callset_path, + self.dataset_type, + self.sample_type, + self.callset_path, ) mt = import_callset( self.callset_path, From 0270a88a506613be1d3cc8257583d8492b95a6df Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 13:03:13 -0400 Subject: [PATCH 03/17] formalize --- v03_pipeline/lib/model/dataset_type.py | 8 +++++++- v03_pipeline/lib/paths.py | 3 +-- v03_pipeline/lib/tasks/write_imported_callset.py | 5 ++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/v03_pipeline/lib/model/dataset_type.py b/v03_pipeline/lib/model/dataset_type.py index b376a6ae8..e7af6983f 100644 --- a/v03_pipeline/lib/model/dataset_type.py +++ b/v03_pipeline/lib/model/dataset_type.py @@ -4,7 +4,7 @@ import hail as hl from v03_pipeline.lib.annotations import gcnv, mito, shared, snv_indel, sv -from v03_pipeline.lib.model.definitions import ReferenceGenome +from v03_pipeline.lib.model.definitions import ReferenceGenome, SampleType MITO_MIN_HOM_THRESHOLD = 0.95 ZERO = 0.0 @@ -155,6 +155,12 @@ def has_gencode_ensembl_to_refseq_id_mapping( self == DatasetType.SNV_INDEL and reference_genome == ReferenceGenome.GRCh38 ) + def expect_filters( + self, + sample_type: SampleType, + ) -> bool: + return self == DatasetType.SNV_INDEL and sample_type == SampleType.WES + @property def has_gencode_gene_symbol_to_gene_id_mapping(self) -> bool: return self == DatasetType.SV diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index c6f987a77..3ab830e5f 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -223,8 +223,7 @@ def valid_filters_path( ) -> str | None: if ( not Env.EXPECT_WES_FILTERS - or dataset_type != DatasetType.SNV_INDEL - or sample_type != SampleType.WES + or not dataset_type.expect_filters(sample_type) or 'part_one_outputs' not in callset_path ): return None diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 2b6f59e1a..9961f5edf 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -16,7 +16,7 @@ validate_sample_type, ) from v03_pipeline.lib.misc.vets import annotate_vets -from v03_pipeline.lib.model import CachedReferenceDatasetQuery, DatasetType, SampleType +from v03_pipeline.lib.model import CachedReferenceDatasetQuery from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, @@ -56,8 +56,7 @@ def requires(self) -> list[luigi.Task]: requirements = [] if ( Env.EXPECT_WES_FILTERS - and self.dataset_type == DatasetType.SNV_INDEL - and self.sample_type == SampleType.WES + and self.dataset_type.expect_filters(self.sample_type) ): requirements = [ *requirements, From e33306a2409a23698747457c6093818d1b2484d9 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 13:13:53 -0400 Subject: [PATCH 04/17] lint --- v03_pipeline/lib/tasks/write_imported_callset.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 9961f5edf..e049ad0e7 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -54,9 +54,8 @@ def output(self) -> luigi.Target: def requires(self) -> list[luigi.Task]: requirements = [] - if ( - Env.EXPECT_WES_FILTERS - and self.dataset_type.expect_filters(self.sample_type) + if Env.EXPECT_WES_FILTERS and self.dataset_type.expect_filters( + self.sample_type ): requirements = [ *requirements, From a9c485b62ba52154508a1c958ebad325e6ca301b Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 13:16:42 -0400 Subject: [PATCH 05/17] lint --- v03_pipeline/lib/tasks/write_imported_callset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index e049ad0e7..87b42ac9e 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -55,7 +55,7 @@ def output(self) -> luigi.Target: def requires(self) -> list[luigi.Task]: requirements = [] if Env.EXPECT_WES_FILTERS and self.dataset_type.expect_filters( - self.sample_type + self.sample_type, ): requirements = [ *requirements, From ac83cd0d3900f20f670632be9fae309dae7ae03b Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 14:30:19 -0400 Subject: [PATCH 06/17] Fix arg --- v03_pipeline/lib/paths_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/v03_pipeline/lib/paths_test.py b/v03_pipeline/lib/paths_test.py index ccb0c47c7..b09b8f387 100644 --- a/v03_pipeline/lib/paths_test.py +++ b/v03_pipeline/lib/paths_test.py @@ -60,6 +60,7 @@ def test_family_table_path(self) -> None: def test_valid_filters_path(self) -> None: self.assertEqual( valid_filters_path( + DatasetType.SNV_INDEL, SampleType.WES, 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', ), @@ -69,6 +70,7 @@ def test_valid_filters_path(self) -> None: mock_env.EXPECT_WES_FILTERS = True self.assertEqual( valid_filters_path( + DatasetType.SNV_INDEL, SampleType.WES, 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', ), From 27c6bd8acfc4c638b77cbc3cc2e376db335821f7 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 14:52:56 -0400 Subject: [PATCH 07/17] Change parameters again --- v03_pipeline/lib/model/environment.py | 2 -- .../lib/tasks/base/base_loading_run_params.py | 14 +++++++++----- v03_pipeline/lib/tasks/update_lookup_table_test.py | 2 -- .../lib/tasks/update_project_table_test.py | 1 - ...iant_annotations_table_with_new_samples_test.py | 11 ++--------- v03_pipeline/lib/tasks/write_family_table_test.py | 3 --- v03_pipeline/lib/tasks/write_imported_callset.py | 12 ++++++------ .../lib/tasks/write_metadata_for_run_test.py | 2 -- .../lib/tasks/write_project_family_tables_test.py | 1 - .../tasks/write_remapped_and_subsetted_callset.py | 4 ++-- .../write_remapped_and_subsetted_callset_test.py | 6 ++---- 11 files changed, 21 insertions(+), 37 deletions(-) diff --git a/v03_pipeline/lib/model/environment.py b/v03_pipeline/lib/model/environment.py index 831f430c6..e6695828a 100644 --- a/v03_pipeline/lib/model/environment.py +++ b/v03_pipeline/lib/model/environment.py @@ -24,7 +24,6 @@ ACCESS_PRIVATE_REFERENCE_DATASETS = ( os.environ.get('ACCESS_PRIVATE_REFERENCE_DATASETS') == '1' ) -EXPECT_WES_FILTERS = os.environ.get('EXPECT_WES_FILTERS') == '1' REFERENCE_DATA_AUTO_UPDATE = os.environ.get('REFERENCE_DATA_AUTO_UPDATE') == '1' SHOULD_REGISTER_ALLELES = os.environ.get('SHOULD_REGISTER_ALLELES') == '1' @@ -33,7 +32,6 @@ class Env: ACCESS_PRIVATE_REFERENCE_DATASETS: bool = ACCESS_PRIVATE_REFERENCE_DATASETS ALLELE_REGISTRY_SECRET_NAME: str | None = ALLELE_REGISTRY_SECRET_NAME - EXPECT_WES_FILTERS: bool = EXPECT_WES_FILTERS HAIL_TMPDIR: str = HAIL_TMPDIR HAIL_SEARCH_DATA: str = HAIL_SEARCH_DATA LOADING_DATASETS: str = LOADING_DATASETS diff --git a/v03_pipeline/lib/tasks/base/base_loading_run_params.py b/v03_pipeline/lib/tasks/base/base_loading_run_params.py index f24a0efbf..08cc8e065 100644 --- a/v03_pipeline/lib/tasks/base/base_loading_run_params.py +++ b/v03_pipeline/lib/tasks/base/base_loading_run_params.py @@ -14,16 +14,20 @@ class BaseLoadingRunParams(luigi.Task): default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) - validate = luigi.BoolParameter( + force = luigi.BoolParameter( + default=False, + parsing=luigi.BoolParameter.EXPLICIT_PARSING, + ) + skip_check_sex_and_relatedness = luigi.BoolParameter( default=True, parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) - force = luigi.BoolParameter( - default=False, + skip_expect_filters = luigi.BoolParameter( + default=True, parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) - check_sex_and_relatedness = luigi.BoolParameter( - default=False, + skip_validation = luigi.BoolParameter( + default=True, parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) is_new_gcnv_joint_call = luigi.BoolParameter( diff --git a/v03_pipeline/lib/tasks/update_lookup_table_test.py b/v03_pipeline/lib/tasks/update_lookup_table_test.py index dc5f22059..c6ee96df7 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_test.py @@ -26,7 +26,6 @@ def test_skip_update_lookup_table_task(self) -> None: ], # a project excluded from the lookup table project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - validate=False, liftover_ref_path=TEST_LIFTOVER, ) worker.add(uslt_task) @@ -58,7 +57,6 @@ def test_update_lookup_table_task(self) -> None: project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - validate=False, liftover_ref_path=TEST_LIFTOVER, ) worker.add(uslt_task) diff --git a/v03_pipeline/lib/tasks/update_project_table_test.py b/v03_pipeline/lib/tasks/update_project_table_test.py index 07278a2d3..0ab93f469 100644 --- a/v03_pipeline/lib/tasks/update_project_table_test.py +++ b/v03_pipeline/lib/tasks/update_project_table_test.py @@ -22,7 +22,6 @@ def test_update_project_table_task(self) -> None: project_guid='R0113_test_project', project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, - validate=False, liftover_ref_path=TEST_LIFTOVER, ) worker.add(upt_task) diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index a00d5e3ab..94aff28ed 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -163,7 +163,6 @@ def test_missing_pedigree( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=['bad_pedigree'], - validate=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) @@ -197,7 +196,6 @@ def test_missing_interval_reference( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - validate=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) @@ -366,7 +364,7 @@ def test_multiple_update_vat( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - validate=True, + skip_validation=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) @@ -418,7 +416,7 @@ def test_multiple_update_vat( project_guids=['R0114_project4'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_4], - validate=True, + skip_validation=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) @@ -689,7 +687,6 @@ def test_update_vat_grch37( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - validate=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) @@ -769,7 +766,6 @@ def test_update_vat_without_accessing_private_datasets( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - validate=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) @@ -827,7 +823,6 @@ def test_mito_update_vat( project_guids=['R0115_test_project2'], project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], - validate=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) @@ -1092,7 +1087,6 @@ def test_sv_update_vat( project_guids=['R0115_test_project2'], project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], - validate=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) @@ -1654,7 +1648,6 @@ def test_gcnv_update_vat( project_guids=['R0115_test_project2'], project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], - validate=False, liftover_ref_path=TEST_LIFTOVER, run_id=TEST_RUN_ID, ) diff --git a/v03_pipeline/lib/tasks/write_family_table_test.py b/v03_pipeline/lib/tasks/write_family_table_test.py index 554735c59..637104cc3 100644 --- a/v03_pipeline/lib/tasks/write_family_table_test.py +++ b/v03_pipeline/lib/tasks/write_family_table_test.py @@ -26,7 +26,6 @@ def test_snv_write_family_table_task(self) -> None: project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, family_guid='abc_1', - validate=False, liftover_ref_path=TEST_LIFTOVER, ) worker.add(wft_task) @@ -164,7 +163,6 @@ def test_sv_write_family_table_task(self) -> None: project_remap_path='not_a_real_file', project_pedigree_path=TEST_PEDIGREE_5, family_guid='family_2_1', - validate=False, liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_family_table_task) @@ -417,7 +415,6 @@ def test_gcnv_write_family_table_task(self) -> None: project_remap_path='not_a_real_file', project_pedigree_path=TEST_PEDIGREE_5, family_guid='family_2_1', - validate=False, liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_family_table_task) diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 87b42ac9e..666114658 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -54,7 +54,7 @@ def output(self) -> luigi.Target: def requires(self) -> list[luigi.Task]: requirements = [] - if Env.EXPECT_WES_FILTERS and self.dataset_type.expect_filters( + if not self.skip_expect_filters and self.dataset_type.expect_filters( self.sample_type, ): requirements = [ @@ -67,7 +67,7 @@ def requires(self) -> list[luigi.Task]: ), ), ] - if self.validate and self.dataset_type.can_run_validation: + if not self.skip_validation and self.dataset_type.can_run_validation: requirements = [ *requirements, ( @@ -86,7 +86,7 @@ def requires(self) -> list[luigi.Task]: ), ] if ( - self.check_sex_and_relatedness + not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): requirements = [ @@ -102,7 +102,7 @@ def additional_row_fields(self, mt): return { **( {'info.AF': hl.tarray(hl.tfloat64)} - if self.check_sex_and_relatedness + if not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness else {} ), @@ -154,7 +154,7 @@ def create_table(self) -> hl.MatrixTable: mt.locus.contig, ), ) - if self.validate and self.dataset_type.can_run_validation: + if not self.skip_validation and self.dataset_type.can_run_validation: validate_allele_type(mt) validate_no_duplicate_variants(mt) validate_expected_contig_frequency(mt, self.reference_genome) @@ -172,7 +172,7 @@ def create_table(self) -> hl.MatrixTable: self.sample_type, ) if ( - self.check_sex_and_relatedness + not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): sex_check_ht = hl.read_table( diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run_test.py b/v03_pipeline/lib/tasks/write_metadata_for_run_test.py index cf61fcc4f..6c16782ed 100644 --- a/v03_pipeline/lib/tasks/write_metadata_for_run_test.py +++ b/v03_pipeline/lib/tasks/write_metadata_for_run_test.py @@ -23,8 +23,6 @@ def test_write_metadata_for_run_task(self) -> None: project_guids=['R0113_test_project', 'R0114_project4'], project_remap_paths=[TEST_REMAP_2, TEST_REMAP_2], project_pedigree_paths=[TEST_PEDIGREE_3, TEST_PEDIGREE_4], - validate=False, - check_sex_and_relatedness=False, run_id='run_123456', ) worker.add(write_metadata_for_run_task) diff --git a/v03_pipeline/lib/tasks/write_project_family_tables_test.py b/v03_pipeline/lib/tasks/write_project_family_tables_test.py index 9943771d2..18d818656 100644 --- a/v03_pipeline/lib/tasks/write_project_family_tables_test.py +++ b/v03_pipeline/lib/tasks/write_project_family_tables_test.py @@ -24,7 +24,6 @@ def test_snv_write_project_family_tables_task(self) -> None: project_guid='R0113_test_project', project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_4, - validate=False, liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_project_family_tables) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index 81e5adf93..ec468eb2c 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -53,7 +53,7 @@ def requires(self) -> list[luigi.Task]: RawFileTask(self.project_pedigree_path), ] if ( - self.check_sex_and_relatedness + not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): requirements = [ @@ -88,7 +88,7 @@ def create_table(self) -> hl.MatrixTable: families_failed_relatedness_check = {} families_failed_sex_check = {} if ( - self.check_sex_and_relatedness + not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): relatedness_check_ht = hl.read_table(self.input()[2].path) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py index 6cfd95098..a8e277dbb 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py @@ -82,8 +82,7 @@ def test_write_remapped_and_subsetted_callset_task( project_guid='R0113_test_project', project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, - validate=False, - check_sex_and_relatedness=True, + skip_check_sex_and_relatedness=False, ) worker.add(wrsc_task) worker.run() @@ -116,8 +115,7 @@ def test_write_remapped_and_subsetted_callset_task_failed_sex_check_family( project_guid='R0114_project4', project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_4, - validate=False, - check_sex_and_relatedness=True, + skip_check_sex_and_relatedness=False, ) worker.add(wrsc_task) worker.run() From 1840e1ea72e880b2ec8bb29c8bf46877a0fd4434 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 15:29:21 -0400 Subject: [PATCH 08/17] Update a bunch of args --- v03_pipeline/lib/paths.py | 3 +-- v03_pipeline/lib/paths_test.py | 20 +++++++++---------- .../lib/tasks/base/base_loading_run_params.py | 6 +++--- .../lib/tasks/update_lookup_table_test.py | 2 ++ .../lib/tasks/update_project_table_test.py | 1 + ...annotations_table_with_new_samples_test.py | 20 +++++++++++++++++-- .../lib/tasks/write_family_table_test.py | 6 ++++++ .../lib/tasks/write_metadata_for_run_test.py | 2 ++ .../tasks/write_project_family_tables_test.py | 2 ++ ...ite_remapped_and_subsetted_callset_test.py | 2 ++ 10 files changed, 46 insertions(+), 18 deletions(-) diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 3ab830e5f..8d5ff6335 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -222,8 +222,7 @@ def valid_filters_path( callset_path: str, ) -> str | None: if ( - not Env.EXPECT_WES_FILTERS - or not dataset_type.expect_filters(sample_type) + not dataset_type.expect_filters(sample_type) or 'part_one_outputs' not in callset_path ): return None diff --git a/v03_pipeline/lib/paths_test.py b/v03_pipeline/lib/paths_test.py index b09b8f387..50a080dc0 100644 --- a/v03_pipeline/lib/paths_test.py +++ b/v03_pipeline/lib/paths_test.py @@ -60,22 +60,20 @@ def test_family_table_path(self) -> None: def test_valid_filters_path(self) -> None: self.assertEqual( valid_filters_path( - DatasetType.SNV_INDEL, + DatasetType.MITO, SampleType.WES, 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', ), None, ) - with patch('v03_pipeline.lib.paths.Env') as mock_env: - mock_env.EXPECT_WES_FILTERS = True - self.assertEqual( - valid_filters_path( - DatasetType.SNV_INDEL, - SampleType.WES, - 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', - ), - 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_two_outputs/*.filtered.*.vcf.gz', - ) + self.assertEqual( + valid_filters_path( + DatasetType.SNV_INDEL, + SampleType.WES, + 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', + ), + 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_two_outputs/*.filtered.*.vcf.gz', + ) def test_project_table_path(self) -> None: self.assertEqual( diff --git a/v03_pipeline/lib/tasks/base/base_loading_run_params.py b/v03_pipeline/lib/tasks/base/base_loading_run_params.py index 08cc8e065..06fdcc967 100644 --- a/v03_pipeline/lib/tasks/base/base_loading_run_params.py +++ b/v03_pipeline/lib/tasks/base/base_loading_run_params.py @@ -19,15 +19,15 @@ class BaseLoadingRunParams(luigi.Task): parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) skip_check_sex_and_relatedness = luigi.BoolParameter( - default=True, + default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) skip_expect_filters = luigi.BoolParameter( - default=True, + default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) skip_validation = luigi.BoolParameter( - default=True, + default=False, parsing=luigi.BoolParameter.EXPLICIT_PARSING, ) is_new_gcnv_joint_call = luigi.BoolParameter( diff --git a/v03_pipeline/lib/tasks/update_lookup_table_test.py b/v03_pipeline/lib/tasks/update_lookup_table_test.py index c6ee96df7..6dac008e7 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_test.py @@ -26,6 +26,7 @@ def test_skip_update_lookup_table_task(self) -> None: ], # a project excluded from the lookup table project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], + skip_validation=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(uslt_task) @@ -57,6 +58,7 @@ def test_update_lookup_table_task(self) -> None: project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], + skip_validation=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(uslt_task) diff --git a/v03_pipeline/lib/tasks/update_project_table_test.py b/v03_pipeline/lib/tasks/update_project_table_test.py index 0ab93f469..ff3f93c1f 100644 --- a/v03_pipeline/lib/tasks/update_project_table_test.py +++ b/v03_pipeline/lib/tasks/update_project_table_test.py @@ -22,6 +22,7 @@ def test_update_project_table_task(self) -> None: project_guid='R0113_test_project', project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, + skip_validation=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(upt_task) diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index 94aff28ed..9991524a7 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -164,6 +164,8 @@ def test_missing_pedigree( project_remap_paths=[TEST_REMAP], project_pedigree_paths=['bad_pedigree'], liftover_ref_path=TEST_LIFTOVER, + skip_validation=True, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker = luigi.worker.Worker() @@ -197,6 +199,8 @@ def test_missing_interval_reference( project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], liftover_ref_path=TEST_LIFTOVER, + skip_validation=True, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker = luigi.worker.Worker() @@ -364,8 +368,9 @@ def test_multiple_update_vat( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - skip_validation=False, liftover_ref_path=TEST_LIFTOVER, + skip_validation=False, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker.add(uvatwns_task_3) @@ -416,8 +421,9 @@ def test_multiple_update_vat( project_guids=['R0114_project4'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_4], - skip_validation=False, liftover_ref_path=TEST_LIFTOVER, + skip_validation=False, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker.add(uvatwns_task_4) @@ -688,6 +694,8 @@ def test_update_vat_grch37( project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], liftover_ref_path=TEST_LIFTOVER, + skip_validation=True, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker.add(uvatwns_task) @@ -767,6 +775,8 @@ def test_update_vat_without_accessing_private_datasets( project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], liftover_ref_path=TEST_LIFTOVER, + skip_validation=True, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker.add(uvatwns_task) @@ -824,6 +834,8 @@ def test_mito_update_vat( project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], liftover_ref_path=TEST_LIFTOVER, + skip_validation=True, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) ) @@ -1088,6 +1100,8 @@ def test_sv_update_vat( project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], liftover_ref_path=TEST_LIFTOVER, + skip_validation=True, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) ) @@ -1649,6 +1663,8 @@ def test_gcnv_update_vat( project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], liftover_ref_path=TEST_LIFTOVER, + skip_validation=True, + skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) ) diff --git a/v03_pipeline/lib/tasks/write_family_table_test.py b/v03_pipeline/lib/tasks/write_family_table_test.py index 637104cc3..2b455349d 100644 --- a/v03_pipeline/lib/tasks/write_family_table_test.py +++ b/v03_pipeline/lib/tasks/write_family_table_test.py @@ -26,6 +26,8 @@ def test_snv_write_family_table_task(self) -> None: project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, family_guid='abc_1', + skip_validation=True, + skip_check_sex_and_relatedness=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(wft_task) @@ -163,6 +165,8 @@ def test_sv_write_family_table_task(self) -> None: project_remap_path='not_a_real_file', project_pedigree_path=TEST_PEDIGREE_5, family_guid='family_2_1', + skip_validation=True, + skip_check_sex_and_relatedness=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_family_table_task) @@ -415,6 +419,8 @@ def test_gcnv_write_family_table_task(self) -> None: project_remap_path='not_a_real_file', project_pedigree_path=TEST_PEDIGREE_5, family_guid='family_2_1', + skip_validation=True, + skip_check_sex_and_relatedness=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_family_table_task) diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run_test.py b/v03_pipeline/lib/tasks/write_metadata_for_run_test.py index 6c16782ed..1feca2434 100644 --- a/v03_pipeline/lib/tasks/write_metadata_for_run_test.py +++ b/v03_pipeline/lib/tasks/write_metadata_for_run_test.py @@ -23,6 +23,8 @@ def test_write_metadata_for_run_task(self) -> None: project_guids=['R0113_test_project', 'R0114_project4'], project_remap_paths=[TEST_REMAP_2, TEST_REMAP_2], project_pedigree_paths=[TEST_PEDIGREE_3, TEST_PEDIGREE_4], + skip_check_sex_and_relatedness=True, + skip_validation=True, run_id='run_123456', ) worker.add(write_metadata_for_run_task) diff --git a/v03_pipeline/lib/tasks/write_project_family_tables_test.py b/v03_pipeline/lib/tasks/write_project_family_tables_test.py index 18d818656..2642fe486 100644 --- a/v03_pipeline/lib/tasks/write_project_family_tables_test.py +++ b/v03_pipeline/lib/tasks/write_project_family_tables_test.py @@ -24,6 +24,8 @@ def test_snv_write_project_family_tables_task(self) -> None: project_guid='R0113_test_project', project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_4, + skip_validation=True, + skip_check_sex_and_relatedness=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_project_family_tables) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py index a8e277dbb..c6576ba8a 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py @@ -82,6 +82,7 @@ def test_write_remapped_and_subsetted_callset_task( project_guid='R0113_test_project', project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, + skip_validation=True, skip_check_sex_and_relatedness=False, ) worker.add(wrsc_task) @@ -115,6 +116,7 @@ def test_write_remapped_and_subsetted_callset_task_failed_sex_check_family( project_guid='R0114_project4', project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_4, + skip_validation=True, skip_check_sex_and_relatedness=False, ) worker.add(wrsc_task) From a686ce8e5afef1726aa539e6f3dc413882da14b9 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 18:10:13 -0400 Subject: [PATCH 09/17] missed a few --- v03_pipeline/lib/tasks/update_lookup_table_test.py | 2 ++ v03_pipeline/lib/tasks/update_project_table_test.py | 1 + 2 files changed, 3 insertions(+) diff --git a/v03_pipeline/lib/tasks/update_lookup_table_test.py b/v03_pipeline/lib/tasks/update_lookup_table_test.py index 6dac008e7..ae28f33da 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_test.py @@ -27,6 +27,7 @@ def test_skip_update_lookup_table_task(self) -> None: project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, + skip_check_sex_and_relatedness=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(uslt_task) @@ -59,6 +60,7 @@ def test_update_lookup_table_task(self) -> None: project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, + skip_check_sex_and_relatedness=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(uslt_task) diff --git a/v03_pipeline/lib/tasks/update_project_table_test.py b/v03_pipeline/lib/tasks/update_project_table_test.py index ff3f93c1f..e45c98b1b 100644 --- a/v03_pipeline/lib/tasks/update_project_table_test.py +++ b/v03_pipeline/lib/tasks/update_project_table_test.py @@ -23,6 +23,7 @@ def test_update_project_table_task(self) -> None: project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, skip_validation=True, + skip_check_sex_and_relatedness=True, liftover_ref_path=TEST_LIFTOVER, ) worker.add(upt_task) From 47c581ecd91718a2b5b66f06a3f3e246c3fc0819 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 18:37:38 -0400 Subject: [PATCH 10/17] fix liftover --- v03_pipeline/lib/model/environment.py | 4 ++++ v03_pipeline/lib/tasks/base/base_loading_run_params.py | 4 ---- v03_pipeline/lib/tasks/update_lookup_table_test.py | 3 --- v03_pipeline/lib/tasks/update_project_table_test.py | 2 -- ..._variant_annotations_table_with_new_samples_test.py | 10 ---------- v03_pipeline/lib/tasks/write_family_table_test.py | 4 ---- v03_pipeline/lib/tasks/write_new_variants_table.py | 1 + .../lib/tasks/write_project_family_tables_test.py | 2 -- 8 files changed, 5 insertions(+), 25 deletions(-) diff --git a/v03_pipeline/lib/model/environment.py b/v03_pipeline/lib/model/environment.py index e6695828a..dd4e3bb3a 100644 --- a/v03_pipeline/lib/model/environment.py +++ b/v03_pipeline/lib/model/environment.py @@ -4,6 +4,9 @@ # NB: using os.environ.get inside the dataclass defaults gives a lint error. HAIL_TMPDIR = os.environ.get('HAIL_TMPDIR', '/tmp') # noqa: S108 HAIL_SEARCH_DATA = os.environ.get('HAIL_SEARCH_DATA', '/hail-search-data') +LIFTOVER_REF_PATH = os.environ.get( + 'LIFTOVER_REF_PATH', 'gs://hail-common/references/grch38_to_grch37.over.chain.gz' +) LOADING_DATASETS = os.environ.get('LOADING_DATASETS', '/seqr-loading-temp') PRIVATE_REFERENCE_DATASETS = os.environ.get( 'PRIVATE_REFERENCE_DATASETS', @@ -34,6 +37,7 @@ class Env: ALLELE_REGISTRY_SECRET_NAME: str | None = ALLELE_REGISTRY_SECRET_NAME HAIL_TMPDIR: str = HAIL_TMPDIR HAIL_SEARCH_DATA: str = HAIL_SEARCH_DATA + LIFTOVER_REF_PATH: str = LIFTOVER_REF_PATH LOADING_DATASETS: str = LOADING_DATASETS PRIVATE_REFERENCE_DATASETS: str = PRIVATE_REFERENCE_DATASETS PROJECT_ID: str | None = PROJECT_ID diff --git a/v03_pipeline/lib/tasks/base/base_loading_run_params.py b/v03_pipeline/lib/tasks/base/base_loading_run_params.py index 06fdcc967..f5ce3b3e4 100644 --- a/v03_pipeline/lib/tasks/base/base_loading_run_params.py +++ b/v03_pipeline/lib/tasks/base/base_loading_run_params.py @@ -34,7 +34,3 @@ class BaseLoadingRunParams(luigi.Task): default=False, description='Is this a fully joint-called callset.', ) - liftover_ref_path = luigi.OptionalParameter( - default='gs://hail-common/references/grch38_to_grch37.over.chain.gz', - description='Path to GRCh38 to GRCh37 coordinates file', - ) diff --git a/v03_pipeline/lib/tasks/update_lookup_table_test.py b/v03_pipeline/lib/tasks/update_lookup_table_test.py index ae28f33da..758b6392c 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_test.py @@ -7,7 +7,6 @@ ) from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase -TEST_LIFTOVER = 'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz' TEST_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf' TEST_REMAP = 'v03_pipeline/var/test/remaps/test_remap_1.tsv' TEST_PEDIGREE_3 = 'v03_pipeline/var/test/pedigrees/test_pedigree_3.tsv' @@ -28,7 +27,6 @@ def test_skip_update_lookup_table_task(self) -> None: project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, skip_check_sex_and_relatedness=True, - liftover_ref_path=TEST_LIFTOVER, ) worker.add(uslt_task) worker.run() @@ -61,7 +59,6 @@ def test_update_lookup_table_task(self) -> None: project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, skip_check_sex_and_relatedness=True, - liftover_ref_path=TEST_LIFTOVER, ) worker.add(uslt_task) worker.run() diff --git a/v03_pipeline/lib/tasks/update_project_table_test.py b/v03_pipeline/lib/tasks/update_project_table_test.py index e45c98b1b..053be9cc1 100644 --- a/v03_pipeline/lib/tasks/update_project_table_test.py +++ b/v03_pipeline/lib/tasks/update_project_table_test.py @@ -5,7 +5,6 @@ from v03_pipeline.lib.tasks.update_project_table import UpdateProjectTableTask from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase -TEST_LIFTOVER = 'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz' TEST_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf' TEST_REMAP = 'v03_pipeline/var/test/remaps/test_remap_1.tsv' TEST_PEDIGREE_3 = 'v03_pipeline/var/test/pedigrees/test_pedigree_3.tsv' @@ -24,7 +23,6 @@ def test_update_project_table_task(self) -> None: project_pedigree_path=TEST_PEDIGREE_3, skip_validation=True, skip_check_sex_and_relatedness=True, - liftover_ref_path=TEST_LIFTOVER, ) worker.add(upt_task) worker.run() diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index 9991524a7..90d067043 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -43,7 +43,6 @@ from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase from v03_pipeline.var.test.vep.mock_vep_data import MOCK_37_VEP_DATA, MOCK_38_VEP_DATA -TEST_LIFTOVER = 'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz' TEST_MITO_MT = 'v03_pipeline/var/test/callsets/mito_1.mt' TEST_SNV_INDEL_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf' TEST_SV_VCF = 'v03_pipeline/var/test/callsets/sv_1.vcf' @@ -163,7 +162,6 @@ def test_missing_pedigree( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=['bad_pedigree'], - liftover_ref_path=TEST_LIFTOVER, skip_validation=True, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, @@ -198,7 +196,6 @@ def test_missing_interval_reference( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - liftover_ref_path=TEST_LIFTOVER, skip_validation=True, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, @@ -368,7 +365,6 @@ def test_multiple_update_vat( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - liftover_ref_path=TEST_LIFTOVER, skip_validation=False, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, @@ -421,7 +417,6 @@ def test_multiple_update_vat( project_guids=['R0114_project4'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_4], - liftover_ref_path=TEST_LIFTOVER, skip_validation=False, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, @@ -693,7 +688,6 @@ def test_update_vat_grch37( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - liftover_ref_path=TEST_LIFTOVER, skip_validation=True, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, @@ -774,7 +768,6 @@ def test_update_vat_without_accessing_private_datasets( project_guids=['R0113_test_project'], project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], - liftover_ref_path=TEST_LIFTOVER, skip_validation=True, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, @@ -833,7 +826,6 @@ def test_mito_update_vat( project_guids=['R0115_test_project2'], project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], - liftover_ref_path=TEST_LIFTOVER, skip_validation=True, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, @@ -1099,7 +1091,6 @@ def test_sv_update_vat( project_guids=['R0115_test_project2'], project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], - liftover_ref_path=TEST_LIFTOVER, skip_validation=True, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, @@ -1662,7 +1653,6 @@ def test_gcnv_update_vat( project_guids=['R0115_test_project2'], project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], - liftover_ref_path=TEST_LIFTOVER, skip_validation=True, skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, diff --git a/v03_pipeline/lib/tasks/write_family_table_test.py b/v03_pipeline/lib/tasks/write_family_table_test.py index 2b455349d..516578348 100644 --- a/v03_pipeline/lib/tasks/write_family_table_test.py +++ b/v03_pipeline/lib/tasks/write_family_table_test.py @@ -5,7 +5,6 @@ from v03_pipeline.lib.tasks.write_family_table import WriteFamilyTableTask from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase -TEST_LIFTOVER = 'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz' TEST_GCNV_BED_FILE = 'v03_pipeline/var/test/callsets/gcnv_1.tsv' TEST_SNV_INDEL_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf' TEST_SV_VCF = 'v03_pipeline/var/test/callsets/sv_1.vcf' @@ -28,7 +27,6 @@ def test_snv_write_family_table_task(self) -> None: family_guid='abc_1', skip_validation=True, skip_check_sex_and_relatedness=True, - liftover_ref_path=TEST_LIFTOVER, ) worker.add(wft_task) worker.run() @@ -167,7 +165,6 @@ def test_sv_write_family_table_task(self) -> None: family_guid='family_2_1', skip_validation=True, skip_check_sex_and_relatedness=True, - liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_family_table_task) worker.run() @@ -421,7 +418,6 @@ def test_gcnv_write_family_table_task(self) -> None: family_guid='family_2_1', skip_validation=True, skip_check_sex_and_relatedness=True, - liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_family_table_task) worker.run() diff --git a/v03_pipeline/lib/tasks/write_new_variants_table.py b/v03_pipeline/lib/tasks/write_new_variants_table.py index d35dccc71..b07a7785f 100644 --- a/v03_pipeline/lib/tasks/write_new_variants_table.py +++ b/v03_pipeline/lib/tasks/write_new_variants_table.py @@ -62,6 +62,7 @@ def annotation_dependencies(self) -> dict[str, hl.Table]: deps['gencode_gene_symbol_to_gene_id_mapping'] = hl.literal( load_gencode_gene_symbol_to_gene_id(GENCODE_RELEASE, ''), ) + deps['liftover_ref_path'] = Env.LIFTOVER_REF_PATH return deps def output(self) -> luigi.Target: diff --git a/v03_pipeline/lib/tasks/write_project_family_tables_test.py b/v03_pipeline/lib/tasks/write_project_family_tables_test.py index 2642fe486..37bb9a556 100644 --- a/v03_pipeline/lib/tasks/write_project_family_tables_test.py +++ b/v03_pipeline/lib/tasks/write_project_family_tables_test.py @@ -7,7 +7,6 @@ ) from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase -TEST_LIFTOVER = 'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz' TEST_SNV_INDEL_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf' TEST_REMAP = 'v03_pipeline/var/test/remaps/test_remap_1.tsv' TEST_PEDIGREE_4 = 'v03_pipeline/var/test/pedigrees/test_pedigree_4.tsv' @@ -26,7 +25,6 @@ def test_snv_write_project_family_tables_task(self) -> None: project_pedigree_path=TEST_PEDIGREE_4, skip_validation=True, skip_check_sex_and_relatedness=True, - liftover_ref_path=TEST_LIFTOVER, ) worker.add(write_project_family_tables) worker.run() From 41a23a579ba1b4b986fe5b44b0355f48f5b0f6dd Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 18:52:32 -0400 Subject: [PATCH 11/17] ruff --- v03_pipeline/lib/model/environment.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/v03_pipeline/lib/model/environment.py b/v03_pipeline/lib/model/environment.py index dd4e3bb3a..6a6a5c70e 100644 --- a/v03_pipeline/lib/model/environment.py +++ b/v03_pipeline/lib/model/environment.py @@ -5,7 +5,8 @@ HAIL_TMPDIR = os.environ.get('HAIL_TMPDIR', '/tmp') # noqa: S108 HAIL_SEARCH_DATA = os.environ.get('HAIL_SEARCH_DATA', '/hail-search-data') LIFTOVER_REF_PATH = os.environ.get( - 'LIFTOVER_REF_PATH', 'gs://hail-common/references/grch38_to_grch37.over.chain.gz' + 'LIFTOVER_REF_PATH', + 'gs://hail-common/references/grch38_to_grch37.over.chain.gz', ) LOADING_DATASETS = os.environ.get('LOADING_DATASETS', '/seqr-loading-temp') PRIVATE_REFERENCE_DATASETS = os.environ.get( From e5b446dfdc439469044713e52f07ff43b70a5364 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 19:10:22 -0400 Subject: [PATCH 12/17] reformat filters annotation --- v03_pipeline/lib/misc/io.py | 4 ---- .../lib/tasks/write_imported_callset.py | 18 ++++++++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/v03_pipeline/lib/misc/io.py b/v03_pipeline/lib/misc/io.py index 91d07e851..992e2585c 100644 --- a/v03_pipeline/lib/misc/io.py +++ b/v03_pipeline/lib/misc/io.py @@ -121,7 +121,6 @@ def import_callset( callset_path: str, reference_genome: ReferenceGenome, dataset_type: DatasetType, - filters_path: str | None = None, ) -> hl.MatrixTable: if dataset_type == DatasetType.GCNV: mt = import_gcnv_bed_file(callset_path) @@ -131,9 +130,6 @@ def import_callset( mt = hl.read_matrix_table(callset_path) if dataset_type == DatasetType.SV: mt = mt.annotate_rows(variant_id=mt.rsid) - if filters_path: - filters_ht = import_vcf(filters_path, reference_genome).rows() - mt = mt.annotate_rows(filters=filters_ht[mt.row_key].filters) return mt.key_rows_by(*dataset_type.table_key_type(reference_genome).fields) diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 666114658..d995a16c7 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -4,6 +4,7 @@ from v03_pipeline.lib.misc.io import ( import_callset, + import_vcf, select_relevant_fields, split_multi_hts, ) @@ -117,17 +118,22 @@ def additional_row_fields(self, mt): } def create_table(self) -> hl.MatrixTable: - filters_path = valid_filters_path( - self.dataset_type, - self.sample_type, - self.callset_path, - ) mt = import_callset( self.callset_path, self.reference_genome, self.dataset_type, - filters_path, ) + filters_path = None + if not self.skip_expect_filters and self.dataset_type.expect_filters( + self.sample_type, + ): + filters_path = valid_filters_path( + self.dataset_type, + self.sample_type, + self.callset_path, + ) + filters_ht = import_vcf(filters_path, self.reference_genome).rows() + mt = mt.annotate_rows(filters=filters_ht[mt.row_key].filters) mt = select_relevant_fields( mt, self.dataset_type, From 3cd656b5733e6f5d4c9f7d1b7688a67051f52dd7 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 21:26:11 -0400 Subject: [PATCH 13/17] add env vars too! --- v03_pipeline/lib/model/environment.py | 4 ++++ v03_pipeline/lib/paths.py | 3 ++- v03_pipeline/lib/paths_test.py | 18 ++++++++------- .../lib/tasks/update_lookup_table_test.py | 2 -- .../lib/tasks/update_project_table_test.py | 1 - ...annotations_table_with_new_samples_test.py | 9 -------- .../lib/tasks/write_family_table_test.py | 3 --- .../lib/tasks/write_imported_callset.py | 22 ++++++++++++++----- .../lib/tasks/write_metadata_for_run_test.py | 1 - .../write_remapped_and_subsetted_callset.py | 4 +++- 10 files changed, 35 insertions(+), 32 deletions(-) diff --git a/v03_pipeline/lib/model/environment.py b/v03_pipeline/lib/model/environment.py index 6a6a5c70e..d12201bef 100644 --- a/v03_pipeline/lib/model/environment.py +++ b/v03_pipeline/lib/model/environment.py @@ -28,6 +28,8 @@ ACCESS_PRIVATE_REFERENCE_DATASETS = ( os.environ.get('ACCESS_PRIVATE_REFERENCE_DATASETS') == '1' ) +CHECK_SEX_AND_RELATEDNESS = os.environ.get('CHECK_SEX_AND_RELATEDNESS') == '1' +EXPECT_WES_FILTERS = os.environ.get('EXPECT_WES_FILTERS') == '1' REFERENCE_DATA_AUTO_UPDATE = os.environ.get('REFERENCE_DATA_AUTO_UPDATE') == '1' SHOULD_REGISTER_ALLELES = os.environ.get('SHOULD_REGISTER_ALLELES') == '1' @@ -36,6 +38,8 @@ class Env: ACCESS_PRIVATE_REFERENCE_DATASETS: bool = ACCESS_PRIVATE_REFERENCE_DATASETS ALLELE_REGISTRY_SECRET_NAME: str | None = ALLELE_REGISTRY_SECRET_NAME + CHECK_SEX_AND_RELATEDNESS: bool = CHECK_SEX_AND_RELATEDNESS + EXPECT_WES_FILTERS: bool = EXPECT_WES_FILTERS HAIL_TMPDIR: str = HAIL_TMPDIR HAIL_SEARCH_DATA: str = HAIL_SEARCH_DATA LIFTOVER_REF_PATH: str = LIFTOVER_REF_PATH diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 8d5ff6335..3caffa956 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -222,6 +222,7 @@ def valid_filters_path( callset_path: str, ) -> str | None: if ( + not Env.EXPECT_WES_FILTERS or not dataset_type.expect_filters(sample_type) or 'part_one_outputs' not in callset_path ): @@ -240,7 +241,7 @@ def valid_reference_dataset_collection_path( ) -> str | None: if ( not Env.ACCESS_PRIVATE_REFERENCE_DATASETS - and reference_dataset_collection.access_control == AccessControl.PRIVATE + or reference_dataset_collection.access_control == AccessControl.PUBLIC ): return None return os.path.join( diff --git a/v03_pipeline/lib/paths_test.py b/v03_pipeline/lib/paths_test.py index 50a080dc0..ff437cf45 100644 --- a/v03_pipeline/lib/paths_test.py +++ b/v03_pipeline/lib/paths_test.py @@ -66,14 +66,16 @@ def test_valid_filters_path(self) -> None: ), None, ) - self.assertEqual( - valid_filters_path( - DatasetType.SNV_INDEL, - SampleType.WES, - 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', - ), - 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_two_outputs/*.filtered.*.vcf.gz', - ) + with patch('v03_pipeline.lib.paths.Env') as mock_env: + mock_env.EXPECT_WES_FILTERS = True + self.assertEqual( + valid_filters_path( + DatasetType.SNV_INDEL, + SampleType.WES, + 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_one_outputs/chr*/*.vcf.gz', + ), + 'gs://bucket/RDG_Broad_WES_Internal_Oct2023/part_two_outputs/*.filtered.*.vcf.gz', + ) def test_project_table_path(self) -> None: self.assertEqual( diff --git a/v03_pipeline/lib/tasks/update_lookup_table_test.py b/v03_pipeline/lib/tasks/update_lookup_table_test.py index 758b6392c..8551d873e 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_test.py @@ -26,7 +26,6 @@ def test_skip_update_lookup_table_task(self) -> None: project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, - skip_check_sex_and_relatedness=True, ) worker.add(uslt_task) worker.run() @@ -58,7 +57,6 @@ def test_update_lookup_table_task(self) -> None: project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, - skip_check_sex_and_relatedness=True, ) worker.add(uslt_task) worker.run() diff --git a/v03_pipeline/lib/tasks/update_project_table_test.py b/v03_pipeline/lib/tasks/update_project_table_test.py index 053be9cc1..c0a5a4e57 100644 --- a/v03_pipeline/lib/tasks/update_project_table_test.py +++ b/v03_pipeline/lib/tasks/update_project_table_test.py @@ -22,7 +22,6 @@ def test_update_project_table_task(self) -> None: project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, skip_validation=True, - skip_check_sex_and_relatedness=True, ) worker.add(upt_task) worker.run() diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index 90d067043..3d723cb3a 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -163,7 +163,6 @@ def test_missing_pedigree( project_remap_paths=[TEST_REMAP], project_pedigree_paths=['bad_pedigree'], skip_validation=True, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker = luigi.worker.Worker() @@ -197,7 +196,6 @@ def test_missing_interval_reference( project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker = luigi.worker.Worker() @@ -366,7 +364,6 @@ def test_multiple_update_vat( project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=False, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker.add(uvatwns_task_3) @@ -418,7 +415,6 @@ def test_multiple_update_vat( project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_4], skip_validation=False, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker.add(uvatwns_task_4) @@ -689,7 +685,6 @@ def test_update_vat_grch37( project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker.add(uvatwns_task) @@ -769,7 +764,6 @@ def test_update_vat_without_accessing_private_datasets( project_remap_paths=[TEST_REMAP], project_pedigree_paths=[TEST_PEDIGREE_3], skip_validation=True, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) worker.add(uvatwns_task) @@ -827,7 +821,6 @@ def test_mito_update_vat( project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], skip_validation=True, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) ) @@ -1092,7 +1085,6 @@ def test_sv_update_vat( project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], skip_validation=True, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) ) @@ -1654,7 +1646,6 @@ def test_gcnv_update_vat( project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], skip_validation=True, - skip_check_sex_and_relatedness=True, run_id=TEST_RUN_ID, ) ) diff --git a/v03_pipeline/lib/tasks/write_family_table_test.py b/v03_pipeline/lib/tasks/write_family_table_test.py index 516578348..3adc96901 100644 --- a/v03_pipeline/lib/tasks/write_family_table_test.py +++ b/v03_pipeline/lib/tasks/write_family_table_test.py @@ -26,7 +26,6 @@ def test_snv_write_family_table_task(self) -> None: project_pedigree_path=TEST_PEDIGREE_3, family_guid='abc_1', skip_validation=True, - skip_check_sex_and_relatedness=True, ) worker.add(wft_task) worker.run() @@ -164,7 +163,6 @@ def test_sv_write_family_table_task(self) -> None: project_pedigree_path=TEST_PEDIGREE_5, family_guid='family_2_1', skip_validation=True, - skip_check_sex_and_relatedness=True, ) worker.add(write_family_table_task) worker.run() @@ -417,7 +415,6 @@ def test_gcnv_write_family_table_task(self) -> None: project_pedigree_path=TEST_PEDIGREE_5, family_guid='family_2_1', skip_validation=True, - skip_check_sex_and_relatedness=True, ) worker.add(write_family_table_task) worker.run() diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index d995a16c7..be42fb750 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -55,8 +55,12 @@ def output(self) -> luigi.Target: def requires(self) -> list[luigi.Task]: requirements = [] - if not self.skip_expect_filters and self.dataset_type.expect_filters( - self.sample_type, + if ( + Env.EXPECT_WES_FILTERS + and not self.skip_expect_filters + and self.dataset_type.expect_filters( + self.sample_type, + ) ): requirements = [ *requirements, @@ -87,7 +91,8 @@ def requires(self) -> list[luigi.Task]: ), ] if ( - not self.skip_check_sex_and_relatedness + Env.CHECK_SEX_AND_RELATEDNESS + and not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): requirements = [ @@ -124,8 +129,12 @@ def create_table(self) -> hl.MatrixTable: self.dataset_type, ) filters_path = None - if not self.skip_expect_filters and self.dataset_type.expect_filters( - self.sample_type, + if ( + Env.EXPECT_WES_FILTERS + and not self.skip_expect_filters + and self.dataset_type.expect_filters( + self.sample_type, + ) ): filters_path = valid_filters_path( self.dataset_type, @@ -178,7 +187,8 @@ def create_table(self) -> hl.MatrixTable: self.sample_type, ) if ( - not self.skip_check_sex_and_relatedness + Env.CHECK_SEX_AND_RELATEDNESS + and not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): sex_check_ht = hl.read_table( diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run_test.py b/v03_pipeline/lib/tasks/write_metadata_for_run_test.py index 1feca2434..f5d733a79 100644 --- a/v03_pipeline/lib/tasks/write_metadata_for_run_test.py +++ b/v03_pipeline/lib/tasks/write_metadata_for_run_test.py @@ -23,7 +23,6 @@ def test_write_metadata_for_run_task(self) -> None: project_guids=['R0113_test_project', 'R0114_project4'], project_remap_paths=[TEST_REMAP_2, TEST_REMAP_2], project_pedigree_paths=[TEST_PEDIGREE_3, TEST_PEDIGREE_4], - skip_check_sex_and_relatedness=True, skip_validation=True, run_id='run_123456', ) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index ec468eb2c..6d4753680 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -15,6 +15,7 @@ ) from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families from v03_pipeline.lib.misc.sample_ids import remap_sample_ids, subset_samples +from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask @@ -88,7 +89,8 @@ def create_table(self) -> hl.MatrixTable: families_failed_relatedness_check = {} families_failed_sex_check = {} if ( - not self.skip_check_sex_and_relatedness + Env.CHECK_SEX_AND_RELATEDNESS + and not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): relatedness_check_ht = hl.read_table(self.input()[2].path) From bab5b5e51cffc45407ef9d26485685344e38d05d Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 21:34:48 -0400 Subject: [PATCH 14/17] ruff --- v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index 6d4753680..fbd3c6c4a 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -54,6 +54,7 @@ def requires(self) -> list[luigi.Task]: RawFileTask(self.project_pedigree_path), ] if ( + Env.CHECK_SEX_AND_RELATEDNESS and not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): From c6816afdc7460b26274e53bfd729ce672fe92415 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 21:37:04 -0400 Subject: [PATCH 15/17] ruff --- v03_pipeline/lib/paths.py | 4 ++-- .../lib/tasks/write_remapped_and_subsetted_callset.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 3caffa956..629898af2 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -222,8 +222,8 @@ def valid_filters_path( callset_path: str, ) -> str | None: if ( - not Env.EXPECT_WES_FILTERS or - not dataset_type.expect_filters(sample_type) + not Env.EXPECT_WES_FILTERS + or not dataset_type.expect_filters(sample_type) or 'part_one_outputs' not in callset_path ): return None diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index fbd3c6c4a..f5e9eb48e 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -54,8 +54,8 @@ def requires(self) -> list[luigi.Task]: RawFileTask(self.project_pedigree_path), ] if ( - Env.CHECK_SEX_AND_RELATEDNESS and - not self.skip_check_sex_and_relatedness + Env.CHECK_SEX_AND_RELATEDNESS + and not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness ): requirements = [ From 56c71251ec5c59219063cace82087e160919e781 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 21:49:45 -0400 Subject: [PATCH 16/17] Fix logic --- v03_pipeline/lib/paths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 629898af2..3ab830e5f 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -241,7 +241,7 @@ def valid_reference_dataset_collection_path( ) -> str | None: if ( not Env.ACCESS_PRIVATE_REFERENCE_DATASETS - or reference_dataset_collection.access_control == AccessControl.PUBLIC + and reference_dataset_collection.access_control == AccessControl.PRIVATE ): return None return os.path.join( From 8bfb13b2611f59477a4ca0ea485d69d5cd718eb8 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Jun 2024 22:18:52 -0400 Subject: [PATCH 17/17] add env mock --- .../lib/tasks/write_remapped_and_subsetted_callset_test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py index c6576ba8a..48f2b481a 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py @@ -1,4 +1,5 @@ import shutil +from unittest.mock import Mock, patch import hail as hl import luigi.worker @@ -83,7 +84,6 @@ def test_write_remapped_and_subsetted_callset_task( project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_3, skip_validation=True, - skip_check_sex_and_relatedness=False, ) worker.add(wrsc_task) worker.run() @@ -104,9 +104,12 @@ def test_write_remapped_and_subsetted_callset_task( ], ) + @patch('v03_pipeline.lib.tasks.write_remapped_and_subsetted_callset.Env') def test_write_remapped_and_subsetted_callset_task_failed_sex_check_family( self, + mock_env: Mock, ) -> None: + mock_env.CHECK_SEX_AND_RELATEDNESS = True worker = luigi.worker.Worker() wrsc_task = WriteRemappedAndSubsettedCallsetTask( reference_genome=ReferenceGenome.GRCh38, @@ -117,7 +120,6 @@ def test_write_remapped_and_subsetted_callset_task_failed_sex_check_family( project_remap_path=TEST_REMAP, project_pedigree_path=TEST_PEDIGREE_4, skip_validation=True, - skip_check_sex_and_relatedness=False, ) worker.add(wrsc_task) worker.run()