From 4e05a0d4380497222a54e891e595811204f96070 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 10:45:09 -0400 Subject: [PATCH 01/10] Remove concept of private crdqs --- .../model/cached_reference_dataset_query.py | 13 ++----- v03_pipeline/lib/paths.py | 36 ++++++++----------- v03_pipeline/lib/paths_test.py | 4 +-- .../updated_cached_reference_dataset_query.py | 4 +-- ...ted_cached_reference_dataset_query_test.py | 4 +-- ...annotations_table_with_new_samples_test.py | 4 +-- .../lib/tasks/write_imported_callset.py | 6 ++-- .../tasks/write_relatedness_check_table.py | 4 +-- .../write_relatedness_check_table_test.py | 4 +-- 9 files changed, 32 insertions(+), 47 deletions(-) diff --git a/v03_pipeline/lib/model/cached_reference_dataset_query.py b/v03_pipeline/lib/model/cached_reference_dataset_query.py index 42b9f6abd..e26fa9fbd 100644 --- a/v03_pipeline/lib/model/cached_reference_dataset_query.py +++ b/v03_pipeline/lib/model/cached_reference_dataset_query.py @@ -4,7 +4,7 @@ import hail as hl from v03_pipeline.lib.model.dataset_type import DatasetType -from v03_pipeline.lib.model.definitions import AccessControl, ReferenceGenome +from v03_pipeline.lib.model.definitions import ReferenceGenome from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.reference_data.queries import ( clinvar_path_variants, @@ -20,10 +20,6 @@ class CachedReferenceDatasetQuery(Enum): GNOMAD_QC = 'gnomad_qc' HIGH_AF_VARIANTS = 'high_af_variants' - @property - def access_control(self) -> AccessControl: - return AccessControl.PUBLIC - def dataset(self, dataset_type: DatasetType) -> str | None: return { CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS: 'clinvar_mito' @@ -56,15 +52,10 @@ def for_reference_genome_dataset_type( reference_genome: ReferenceGenome, dataset_type: DatasetType, ) -> list['CachedReferenceDatasetQuery']: - crdqs = { + return { (ReferenceGenome.GRCh38, DatasetType.SNV_INDEL): list(cls), (ReferenceGenome.GRCh38, DatasetType.MITO): [ CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ], (ReferenceGenome.GRCh37, DatasetType.SNV_INDEL): list(cls), }.get((reference_genome, dataset_type), []) - if not Env.ACCESS_PRIVATE_REFERENCE_DATASETS: - return [ - crdq for crdq in crdqs if crdq.access_control == AccessControl.PUBLIC - ] - return crdqs diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 93669d43a..67f7234a5 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -40,6 +40,21 @@ def _v03_reference_data_prefix( reference_genome.value, ) +def cached_reference_dataset_query_path( + reference_genome: ReferenceGenome, + dataset_type: DatasetType, + cached_reference_dataset_query: CachedReferenceDatasetQuery, +) -> str: + return os.path.join( + _v03_reference_data_prefix( + cached_reference_dataset_query.access_control, + reference_genome, + ), + dataset_type.value, + 'cached_reference_dataset_queries', + f'{cached_reference_dataset_query.value}.ht', + ) + def family_table_path( reference_genome: ReferenceGenome, @@ -182,27 +197,6 @@ def sex_check_table_path( ) -def valid_cached_reference_dataset_query_path( - reference_genome: ReferenceGenome, - dataset_type: DatasetType, - cached_reference_dataset_query: CachedReferenceDatasetQuery, -) -> str | None: - if ( - not Env.ACCESS_PRIVATE_REFERENCE_DATASETS - and cached_reference_dataset_query.access_control == AccessControl.PRIVATE - ): - return None - return os.path.join( - _v03_reference_data_prefix( - cached_reference_dataset_query.access_control, - reference_genome, - ), - dataset_type.value, - 'cached_reference_dataset_queries', - f'{cached_reference_dataset_query.value}.ht', - ) - - def valid_reference_dataset_collection_path( reference_genome: ReferenceGenome, dataset_type: DatasetType, diff --git a/v03_pipeline/lib/paths_test.py b/v03_pipeline/lib/paths_test.py index fabb920e1..f31b8f8ef 100644 --- a/v03_pipeline/lib/paths_test.py +++ b/v03_pipeline/lib/paths_test.py @@ -17,7 +17,7 @@ relatedness_check_table_path, remapped_and_subsetted_callset_path, sex_check_table_path, - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, valid_reference_dataset_collection_path, variant_annotations_table_path, ) @@ -26,7 +26,7 @@ class TestPaths(unittest.TestCase): def test_cached_reference_dataset_query_path(self) -> None: self.assertEqual( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py index 9177aa21f..92fc7718b 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py @@ -8,7 +8,7 @@ ReferenceDatasetCollection, ) from v03_pipeline.lib.paths import ( - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, valid_reference_dataset_collection_path, ) from v03_pipeline.lib.reference_data.compare_globals import ( @@ -56,7 +56,7 @@ def complete(self) -> bool: def output(self) -> luigi.Target: return GCSorLocalTarget( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( self.reference_genome, self.dataset_type, self.crdq, diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py index 6541f8672..8dd9558c8 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py @@ -14,7 +14,7 @@ SampleType, ) from v03_pipeline.lib.paths import ( - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, valid_reference_dataset_collection_path, ) from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS @@ -167,7 +167,7 @@ def test_clinvar( # clinvar has version '2022-01-01' shutil.copytree( CLINVAR_CRDQ_PATH, - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index 21fe5f532..80a8fe2b4 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -24,7 +24,7 @@ SampleType, ) from v03_pipeline.lib.paths import ( - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, valid_reference_dataset_collection_path, ) from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS @@ -341,7 +341,7 @@ def test_multiple_update_vat( ), ) coding_and_noncoding_variants_ht.write( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 345af90e2..04af5bfc9 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -20,7 +20,7 @@ from v03_pipeline.lib.paths import ( imported_callset_path, sex_check_table_path, - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, ) from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import CallsetTask, GCSorLocalTarget, HailTableTask @@ -86,7 +86,7 @@ def requires(self) -> list[luigi.Task]: ) if Env.REFERENCE_DATA_AUTO_UPDATE else HailTableTask( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( self.reference_genome, self.dataset_type, CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, @@ -169,7 +169,7 @@ def create_table(self) -> hl.MatrixTable: validate_no_duplicate_variants(mt) validate_expected_contig_frequency(mt, self.reference_genome) coding_and_noncoding_ht = hl.read_table( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( self.reference_genome, self.dataset_type, CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table.py b/v03_pipeline/lib/tasks/write_relatedness_check_table.py index be7b92e6e..a53f69430 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table.py @@ -5,7 +5,7 @@ from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env from v03_pipeline.lib.paths import ( relatedness_check_table_path, - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, ) from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask @@ -48,7 +48,7 @@ def requires(self) -> luigi.Task: ) if Env.REFERENCE_DATA_AUTO_UPDATE else HailTableTask( - valid_cached_reference_dataset_query_path( + cached_reference_dataset_query_path( self.reference_genome, self.dataset_type, CachedReferenceDatasetQuery.GNOMAD_QC, diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py b/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py index dd33bb5e2..f3302b399 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py @@ -14,7 +14,7 @@ from v03_pipeline.lib.paths import ( imported_callset_path, relatedness_check_table_path, - valid_cached_reference_dataset_query_path, + cached_reference_dataset_query_path, ) from v03_pipeline.lib.tasks.write_relatedness_check_table import ( WriteRelatednessCheckTableTask, @@ -45,7 +45,7 @@ class WriteRelatednessCheckTableTaskTest(MockedDatarootTestCase): def setUp(self) -> None: super().setUp() - self.gnomad_qc_path = valid_cached_reference_dataset_query_path( + self.gnomad_qc_path = cached_reference_dataset_query_path( ReferenceGenome.GRCh38, DatasetType.SNV_INDEL, CachedReferenceDatasetQuery.GNOMAD_QC, From 65da04f07ecaeef3113d029444a7fa78772d59b4 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 10:48:37 -0400 Subject: [PATCH 02/10] lint --- v03_pipeline/lib/model/cached_reference_dataset_query.py | 1 - v03_pipeline/lib/paths.py | 1 + v03_pipeline/lib/paths_test.py | 2 +- v03_pipeline/lib/tasks/write_imported_callset.py | 2 +- v03_pipeline/lib/tasks/write_relatedness_check_table.py | 2 +- v03_pipeline/lib/tasks/write_relatedness_check_table_test.py | 2 +- 6 files changed, 5 insertions(+), 5 deletions(-) diff --git a/v03_pipeline/lib/model/cached_reference_dataset_query.py b/v03_pipeline/lib/model/cached_reference_dataset_query.py index e26fa9fbd..02ff1c807 100644 --- a/v03_pipeline/lib/model/cached_reference_dataset_query.py +++ b/v03_pipeline/lib/model/cached_reference_dataset_query.py @@ -5,7 +5,6 @@ from v03_pipeline.lib.model.dataset_type import DatasetType from v03_pipeline.lib.model.definitions import ReferenceGenome -from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.reference_data.queries import ( clinvar_path_variants, gnomad_coding_and_noncoding_variants, diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 67f7234a5..5d35c3e43 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -40,6 +40,7 @@ def _v03_reference_data_prefix( reference_genome.value, ) + def cached_reference_dataset_query_path( reference_genome: ReferenceGenome, dataset_type: DatasetType, diff --git a/v03_pipeline/lib/paths_test.py b/v03_pipeline/lib/paths_test.py index f31b8f8ef..d6f0b10ba 100644 --- a/v03_pipeline/lib/paths_test.py +++ b/v03_pipeline/lib/paths_test.py @@ -8,6 +8,7 @@ ReferenceGenome, ) from v03_pipeline.lib.paths import ( + cached_reference_dataset_query_path, family_table_path, imported_callset_path, lookup_table_path, @@ -17,7 +18,6 @@ relatedness_check_table_path, remapped_and_subsetted_callset_path, sex_check_table_path, - cached_reference_dataset_query_path, valid_reference_dataset_collection_path, variant_annotations_table_path, ) diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 04af5bfc9..e5847380b 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -18,9 +18,9 @@ from v03_pipeline.lib.model import CachedReferenceDatasetQuery from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import ( + cached_reference_dataset_query_path, imported_callset_path, sex_check_table_path, - cached_reference_dataset_query_path, ) from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import CallsetTask, GCSorLocalTarget, HailTableTask diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table.py b/v03_pipeline/lib/tasks/write_relatedness_check_table.py index a53f69430..1ba75446c 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table.py @@ -4,8 +4,8 @@ from v03_pipeline.lib.methods.relatedness import call_relatedness from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env from v03_pipeline.lib.paths import ( - relatedness_check_table_path, cached_reference_dataset_query_path, + relatedness_check_table_path, ) from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py b/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py index f3302b399..239c6ea11 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py @@ -12,9 +12,9 @@ SampleType, ) from v03_pipeline.lib.paths import ( + cached_reference_dataset_query_path, imported_callset_path, relatedness_check_table_path, - cached_reference_dataset_query_path, ) from v03_pipeline.lib.tasks.write_relatedness_check_table import ( WriteRelatednessCheckTableTask, From f90baef2995b24a804897321f993209d79e536dd Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 11:19:51 -0400 Subject: [PATCH 03/10] fix logic --- v03_pipeline/lib/paths.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py index 5d35c3e43..14482d831 100644 --- a/v03_pipeline/lib/paths.py +++ b/v03_pipeline/lib/paths.py @@ -48,7 +48,7 @@ def cached_reference_dataset_query_path( ) -> str: return os.path.join( _v03_reference_data_prefix( - cached_reference_dataset_query.access_control, + AccessControl.PUBLIC, reference_genome, ), dataset_type.value, From 7c8cbb2c0b8260a22179aa8312a920ab13891d40 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 13:12:38 -0400 Subject: [PATCH 04/10] Move SampleType out of BaseHailTableTask --- v03_pipeline/lib/tasks/base/base_hail_table.py | 3 +-- .../base/base_update_variant_annotations_table.py | 1 - .../base_update_variant_annotations_table_test.py | 2 -- v03_pipeline/lib/tasks/delete_family_table_test.py | 3 +-- v03_pipeline/lib/tasks/delete_family_tables.py | 1 - .../lib/tasks/delete_family_tables_test.py | 3 +-- .../lib/tasks/delete_project_family_tables.py | 1 - .../lib/tasks/delete_project_family_tables_test.py | 3 +-- v03_pipeline/lib/tasks/delete_project_table.py | 1 - .../update_cached_reference_dataset_queries.py | 2 -- ...update_cached_reference_dataset_queries_test.py | 14 -------------- ...ns_table_with_updated_reference_dataset_test.py | 4 ---- .../updated_cached_reference_dataset_query.py | 1 - .../updated_cached_reference_dataset_query_test.py | 3 --- .../updated_reference_dataset_collection_test.py | 3 --- v03_pipeline/lib/tasks/update_lookup_table.py | 2 ++ ...date_lookup_table_with_deleted_families_test.py | 4 +--- ...pdate_lookup_table_with_deleted_project_test.py | 4 +--- v03_pipeline/lib/tasks/update_project_table.py | 2 ++ ...ate_project_table_with_deleted_families_test.py | 3 +-- ...iant_annotations_table_with_deleted_families.py | 1 - ...annotations_table_with_deleted_families_test.py | 3 +-- ...riant_annotations_table_with_deleted_project.py | 1 - ..._annotations_table_with_deleted_project_test.py | 3 +-- ...e_variant_annotations_table_with_new_samples.py | 2 ++ v03_pipeline/lib/tasks/write_family_table.py | 2 ++ v03_pipeline/lib/tasks/write_imported_callset.py | 5 ++--- v03_pipeline/lib/tasks/write_metadata_for_run.py | 2 ++ v03_pipeline/lib/tasks/write_new_variants_table.py | 3 ++- .../lib/tasks/write_project_family_tables.py | 2 ++ .../lib/tasks/write_relatedness_check_table.py | 3 ++- .../tasks/write_remapped_and_subsetted_callset.py | 2 ++ 32 files changed, 29 insertions(+), 60 deletions(-) diff --git a/v03_pipeline/lib/tasks/base/base_hail_table.py b/v03_pipeline/lib/tasks/base/base_hail_table.py index e8c2b47e5..23480ffb9 100644 --- a/v03_pipeline/lib/tasks/base/base_hail_table.py +++ b/v03_pipeline/lib/tasks/base/base_hail_table.py @@ -2,7 +2,7 @@ import luigi from v03_pipeline.lib.logger import get_logger -from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome from v03_pipeline.lib.tasks.files import GCSorLocalFolderTarget logger = get_logger(__name__) @@ -11,7 +11,6 @@ class BaseHailTableTask(luigi.Task): reference_genome = luigi.EnumParameter(enum=ReferenceGenome) dataset_type = luigi.EnumParameter(enum=DatasetType) - sample_type = luigi.EnumParameter(enum=SampleType) def output(self) -> luigi.Target: raise NotImplementedError diff --git a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py index c04e4f060..32f5f8205 100644 --- a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py +++ b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py @@ -39,7 +39,6 @@ def requires(self) -> list[luigi.Task]: UpdatedReferenceDatasetCollectionTask( self.reference_genome, self.dataset_type, - self.sample_type, rdc, ) if Env.REFERENCE_DATA_AUTO_UPDATE diff --git a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py index 5ee33d4cd..05f90e097 100644 --- a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py +++ b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py @@ -8,7 +8,6 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.paths import valid_reference_dataset_collection_path from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import ( @@ -59,7 +58,6 @@ def test_should_create_initialized_table(self, mock_update_rdc_task) -> None: vat_task = BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) self.assertTrue('annotations.ht' in vat_task.output().path) self.assertTrue(DatasetType.SNV_INDEL.value in vat_task.output().path) diff --git a/v03_pipeline/lib/tasks/delete_family_table_test.py b/v03_pipeline/lib/tasks/delete_family_table_test.py index d4f07c146..43e92bb6b 100644 --- a/v03_pipeline/lib/tasks/delete_family_table_test.py +++ b/v03_pipeline/lib/tasks/delete_family_table_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import family_table_path from v03_pipeline.lib.tasks.delete_family_table import DeleteFamilyTableTask from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase @@ -50,7 +50,6 @@ def test_delete_family_table_task(self) -> None: task = DeleteFamilyTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, family_guid='abc_1', ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/delete_family_tables.py b/v03_pipeline/lib/tasks/delete_family_tables.py index f8ff365a2..a68f4dc28 100644 --- a/v03_pipeline/lib/tasks/delete_family_tables.py +++ b/v03_pipeline/lib/tasks/delete_family_tables.py @@ -23,7 +23,6 @@ def run(self): DeleteFamilyTableTask( reference_genome=self.reference_genome, dataset_type=self.dataset_type, - sample_type=self.sample_type, family_guid=family_guid, ), ) diff --git a/v03_pipeline/lib/tasks/delete_family_tables_test.py b/v03_pipeline/lib/tasks/delete_family_tables_test.py index be8d99eea..535299602 100644 --- a/v03_pipeline/lib/tasks/delete_family_tables_test.py +++ b/v03_pipeline/lib/tasks/delete_family_tables_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import family_table_path from v03_pipeline.lib.tasks.delete_family_tables import ( DeleteFamilyTablesTask, @@ -38,7 +38,6 @@ def test_delete_project_family_tables_task(self) -> None: task = DeleteFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, family_guids=['family_a', 'family_b'], ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/delete_project_family_tables.py b/v03_pipeline/lib/tasks/delete_project_family_tables.py index e366edb99..befca9a45 100644 --- a/v03_pipeline/lib/tasks/delete_project_family_tables.py +++ b/v03_pipeline/lib/tasks/delete_project_family_tables.py @@ -35,7 +35,6 @@ def run(self): DeleteFamilyTableTask( reference_genome=self.reference_genome, dataset_type=self.dataset_type, - sample_type=self.sample_type, family_guid=family_guid, ), ) diff --git a/v03_pipeline/lib/tasks/delete_project_family_tables_test.py b/v03_pipeline/lib/tasks/delete_project_family_tables_test.py index e86005f9a..3cb56f1c8 100644 --- a/v03_pipeline/lib/tasks/delete_project_family_tables_test.py +++ b/v03_pipeline/lib/tasks/delete_project_family_tables_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import family_table_path, project_table_path from v03_pipeline.lib.tasks.delete_project_family_tables import ( DeleteProjectFamilyTablesTask, @@ -149,7 +149,6 @@ def test_delete_project_family_tables_task(self) -> None: task = DeleteProjectFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, project_guid='project_a', ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/delete_project_table.py b/v03_pipeline/lib/tasks/delete_project_table.py index 283d7f137..0a403ea3b 100644 --- a/v03_pipeline/lib/tasks/delete_project_table.py +++ b/v03_pipeline/lib/tasks/delete_project_table.py @@ -15,7 +15,6 @@ def requires(self) -> luigi.Task: return DeleteProjectFamilyTablesTask( self.reference_genome, self.dataset_type, - self.sample_type, self.project_guid, ) diff --git a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py index 5efbad471..bcae534ba 100644 --- a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py +++ b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py @@ -4,7 +4,6 @@ CachedReferenceDatasetQuery, DatasetType, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import ( UpdatedCachedReferenceDatasetQuery, @@ -14,7 +13,6 @@ class UpdateCachedReferenceDatasetQueries(luigi.Task): reference_genome = luigi.EnumParameter(enum=ReferenceGenome) dataset_type = luigi.EnumParameter(enum=DatasetType) - sample_type = luigi.EnumParameter(enum=SampleType) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py index 845ccfc45..794a77897 100644 --- a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py +++ b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py @@ -7,7 +7,6 @@ CachedReferenceDatasetQuery, DatasetType, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.tasks.reference_data.update_cached_reference_dataset_queries import ( UpdateCachedReferenceDatasetQueries, @@ -25,7 +24,6 @@ def test_37_snv_indel(self, mock_crdq_task): task = UpdateCachedReferenceDatasetQueries( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) worker.add(task) worker.run() @@ -35,25 +33,21 @@ def test_37_snv_indel(self, mock_crdq_task): mock.call( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ), mock.call( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, ), mock.call( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_QC, ), mock.call( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS, ), ], @@ -65,7 +59,6 @@ def test_38_snv_indel(self, mock_crdq_task): task = UpdateCachedReferenceDatasetQueries( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) worker.add(task) worker.run() @@ -75,25 +68,21 @@ def test_38_snv_indel(self, mock_crdq_task): mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ), mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, ), mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_QC, ), mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS, ), ], @@ -105,7 +94,6 @@ def test_38_mito(self, mock_crdq_task): task = UpdateCachedReferenceDatasetQueries( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.MITO, - sample_type=SampleType.WGS, ) worker.add(task) worker.run() @@ -115,7 +103,6 @@ def test_38_mito(self, mock_crdq_task): mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.MITO, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ), ], @@ -127,7 +114,6 @@ def test_38_sv(self, mock_crdq_task): task = UpdateCachedReferenceDatasetQueries( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SV, - sample_type=SampleType.WGS, ) worker.add(task) worker.run() diff --git a/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py b/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py index 67a5492bf..dc6304cf5 100644 --- a/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py +++ b/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py @@ -15,7 +15,6 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.paths import valid_reference_dataset_collection_path from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS @@ -730,7 +729,6 @@ def test_update_vat_with_updated_rdc_snv_indel_38( task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) worker = luigi.worker.Worker() worker.add(task) @@ -925,7 +923,6 @@ def test_update_vat_with_updated_rdc_mito_38( task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.MITO, - sample_type=SampleType.WGS, ) worker = luigi.worker.Worker() worker.add(task) @@ -1068,7 +1065,6 @@ def test_update_vat_with_updated_rdc_snv_indel_37( task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) worker = luigi.worker.Worker() worker.add(task) diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py index 92fc7718b..ef54d471d 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py @@ -68,7 +68,6 @@ def requires(self) -> luigi.Task: return UpdatedReferenceDatasetCollectionTask( self.reference_genome, self.dataset_type, - self.sample_type, ReferenceDatasetCollection.COMBINED, ) if self.crdq.query_raw_dataset: diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py index 8dd9558c8..210a8cc8a 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py @@ -11,7 +11,6 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, @@ -109,7 +108,6 @@ def test_gnomad_qc( task = UpdatedCachedReferenceDatasetQuery( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_QC, ) worker.add(task) @@ -199,7 +197,6 @@ def _clinvar_path_variants(table, **_: Any): task = UpdatedCachedReferenceDatasetQuery( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py b/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py index 06c73559b..9995225c0 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py @@ -10,7 +10,6 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.paths import valid_reference_dataset_collection_path from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS @@ -170,7 +169,6 @@ def test_update_task_with_empty_reference_data_table( task = UpdatedReferenceDatasetCollectionTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_dataset_collection=ReferenceDatasetCollection.COMBINED, ) worker.add(task) @@ -280,7 +278,6 @@ def test_update_task_with_existing_reference_dataset_collection_table( task = UpdatedReferenceDatasetCollectionTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_dataset_collection=ReferenceDatasetCollection.COMBINED, ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/update_lookup_table.py b/v03_pipeline/lib/tasks/update_lookup_table.py index eb6068e76..0c389b713 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table.py +++ b/v03_pipeline/lib/tasks/update_lookup_table.py @@ -7,6 +7,7 @@ join_lookup_hts, remove_family_guids, ) +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.model.constants import PROJECTS_EXCLUDED_FROM_LOOKUP from v03_pipeline.lib.tasks.base.base_update_lookup_table import ( BaseUpdateLookupTableTask, @@ -17,6 +18,7 @@ class UpdateLookupTableTask(BaseUpdateLookupTableTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_paths = luigi.ListParameter() project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() diff --git a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py index 283dd5003..70915ef9d 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.tasks.update_lookup_table_with_deleted_families import ( UpdateLookupTableWithDeletedFamiliesTask, ) @@ -17,7 +17,6 @@ def test_delete_project_empty_table( worker = luigi.worker.Worker() task = UpdateLookupTableWithDeletedFamiliesTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='R0555_seqr_demo', family_guids=['abc'], @@ -132,7 +131,6 @@ def test_delete_project( worker = luigi.worker.Worker() task = UpdateLookupTableWithDeletedFamiliesTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_a', family_guids=['1', '3'], diff --git a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py index 5375d4c32..e40e034ec 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.tasks.update_lookup_table_with_deleted_project import ( UpdateLookupTableWithDeletedProjectTask, ) @@ -17,7 +17,6 @@ def test_delete_project_empty_table( worker = luigi.worker.Worker() task = UpdateLookupTableWithDeletedProjectTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='R0555_seqr_demo', ) @@ -131,7 +130,6 @@ def test_delete_project( worker = luigi.worker.Worker() task = UpdateLookupTableWithDeletedProjectTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_a', ) diff --git a/v03_pipeline/lib/tasks/update_project_table.py b/v03_pipeline/lib/tasks/update_project_table.py index 508c51c30..288b6b20c 100644 --- a/v03_pipeline/lib/tasks/update_project_table.py +++ b/v03_pipeline/lib/tasks/update_project_table.py @@ -7,6 +7,7 @@ join_family_entries_hts, remove_family_guids, ) +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.tasks.base.base_update_project_table import ( BaseUpdateProjectTableTask, ) @@ -16,6 +17,7 @@ class UpdateProjectTableTask(BaseUpdateProjectTableTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() project_remap_path = luigi.Parameter() project_pedigree_path = luigi.Parameter() diff --git a/v03_pipeline/lib/tasks/update_project_table_with_deleted_families_test.py b/v03_pipeline/lib/tasks/update_project_table_with_deleted_families_test.py index 77936595c..b1f5cc5af 100644 --- a/v03_pipeline/lib/tasks/update_project_table_with_deleted_families_test.py +++ b/v03_pipeline/lib/tasks/update_project_table_with_deleted_families_test.py @@ -1,7 +1,7 @@ import hail as hl import luigi -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import project_table_path from v03_pipeline.lib.tasks.update_project_table_with_deleted_families import ( UpdateProjectTableWithDeletedFamiliesTask, @@ -128,7 +128,6 @@ def test_update_project_with_deleted_families(self): worker = luigi.worker.Worker() task = UpdateProjectTableWithDeletedFamiliesTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_a', family_guids=['family_b'], diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families.py index 4db8ca582..a77f5280c 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families.py @@ -24,7 +24,6 @@ def requires(self) -> luigi.Task | None: if self.dataset_type.has_lookup_table: return UpdateLookupTableWithDeletedFamiliesTask( dataset_type=self.dataset_type, - sample_type=self.sample_type, reference_genome=self.reference_genome, project_guid=self.project_guid, family_guids=self.family_guids, diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families_test.py index 266ddf19e..67410ef18 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families_test.py @@ -1,7 +1,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import ( lookup_table_path, variant_annotations_table_path, @@ -139,7 +139,6 @@ def test_update_annotations_with_deleted_project(self) -> None: worker = luigi.worker.Worker() task = UpdateVariantAnnotationsTableWithDeletedFamiliesTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_a', family_guids=['2', '3'], diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project.py index 447c16e04..a0a97637b 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project.py @@ -19,7 +19,6 @@ def requires(self) -> luigi.Task | None: if self.dataset_type.has_lookup_table: return UpdateLookupTableWithDeletedProjectTask( dataset_type=self.dataset_type, - sample_type=self.sample_type, reference_genome=self.reference_genome, project_guid=self.project_guid, ) diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py index a77bc28b9..295a9577b 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py @@ -1,7 +1,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import ( lookup_table_path, variant_annotations_table_path, @@ -147,7 +147,6 @@ def test_update_annotations_with_deleted_project(self) -> None: worker = luigi.worker.Worker() task = UpdateVariantAnnotationsTableWithDeletedProjectTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_b', ) diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py index 99e6c49df..a38bd2bc7 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py @@ -3,6 +3,7 @@ from v03_pipeline.lib.annotations.fields import get_fields from v03_pipeline.lib.misc.callsets import callset_project_pairs, get_callset_ht +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.paths import ( lookup_table_path, new_variants_table_path, @@ -16,6 +17,7 @@ class UpdateVariantAnnotationsTableWithNewSamplesTask( BaseUpdateVariantAnnotationsTableTask, ): + sample_type = luigi.EnumParameter(enum=SampleType) callset_paths = luigi.ListParameter() project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() diff --git a/v03_pipeline/lib/tasks/write_family_table.py b/v03_pipeline/lib/tasks/write_family_table.py index ef3025842..3ea156fdf 100644 --- a/v03_pipeline/lib/tasks/write_family_table.py +++ b/v03_pipeline/lib/tasks/write_family_table.py @@ -1,6 +1,7 @@ import hail as hl import luigi +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.paths import family_table_path from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget @@ -10,6 +11,7 @@ class WriteFamilyTableTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() project_guid = luigi.Parameter() project_remap_path = luigi.Parameter() diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index e5847380b..07b0bb37e 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -15,7 +15,7 @@ validate_sample_type, ) from v03_pipeline.lib.misc.vets import annotate_vets -from v03_pipeline.lib.model import CachedReferenceDatasetQuery +from v03_pipeline.lib.model import CachedReferenceDatasetQuery, SampleType from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, @@ -31,6 +31,7 @@ class WriteImportedCallsetTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() imputed_sex_path = luigi.Parameter(default=None) filters_path = luigi.OptionalParameter( @@ -81,7 +82,6 @@ def requires(self) -> list[luigi.Task]: UpdatedCachedReferenceDatasetQuery( reference_genome=self.reference_genome, dataset_type=self.dataset_type, - sample_type=self.sample_type, crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, ) if Env.REFERENCE_DATA_AUTO_UPDATE @@ -103,7 +103,6 @@ def requires(self) -> list[luigi.Task]: WriteSexCheckTableTask( self.reference_genome, self.dataset_type, - self.sample_type, self.callset_path, self.imputed_sex_path, ), diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run.py b/v03_pipeline/lib/tasks/write_metadata_for_run.py index 3ec7d4f64..80b39caca 100644 --- a/v03_pipeline/lib/tasks/write_metadata_for_run.py +++ b/v03_pipeline/lib/tasks/write_metadata_for_run.py @@ -4,6 +4,7 @@ import luigi from v03_pipeline.lib.misc.callsets import callset_project_pairs +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.paths import metadata_for_run_path from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget @@ -13,6 +14,7 @@ class WriteMetadataForRunTask(BaseHailTableTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_paths = luigi.ListParameter() project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() diff --git a/v03_pipeline/lib/tasks/write_new_variants_table.py b/v03_pipeline/lib/tasks/write_new_variants_table.py index 734b07d73..abcfc5458 100644 --- a/v03_pipeline/lib/tasks/write_new_variants_table.py +++ b/v03_pipeline/lib/tasks/write_new_variants_table.py @@ -10,7 +10,7 @@ from v03_pipeline.lib.misc.allele_registry import register_alleles_in_chunks from v03_pipeline.lib.misc.callsets import callset_project_pairs, get_callset_ht from v03_pipeline.lib.misc.math import constrain -from v03_pipeline.lib.model import Env, ReferenceDatasetCollection +from v03_pipeline.lib.model import Env, ReferenceDatasetCollection, SampleType from v03_pipeline.lib.paths import ( new_variants_table_path, variant_annotations_table_path, @@ -37,6 +37,7 @@ class WriteNewVariantsTableTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_paths = luigi.ListParameter() project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() diff --git a/v03_pipeline/lib/tasks/write_project_family_tables.py b/v03_pipeline/lib/tasks/write_project_family_tables.py index fe96f441b..7bd59005b 100644 --- a/v03_pipeline/lib/tasks/write_project_family_tables.py +++ b/v03_pipeline/lib/tasks/write_project_family_tables.py @@ -1,12 +1,14 @@ import hail as hl import luigi +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask from v03_pipeline.lib.tasks.update_project_table import UpdateProjectTableTask from v03_pipeline.lib.tasks.write_family_table import WriteFamilyTableTask class WriteProjectFamilyTablesTask(BaseHailTableTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() project_guid = luigi.Parameter() project_remap_path = luigi.Parameter() diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table.py b/v03_pipeline/lib/tasks/write_relatedness_check_table.py index 1ba75446c..3893f5760 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table.py @@ -2,7 +2,7 @@ import luigi from v03_pipeline.lib.methods.relatedness import call_relatedness -from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env +from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env, SampleType from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, relatedness_check_table_path, @@ -16,6 +16,7 @@ class WriteRelatednessCheckTableTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() def output(self) -> luigi.Target: diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index 7998fb689..e72a2d576 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -14,6 +14,7 @@ ) from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families from v03_pipeline.lib.misc.sample_ids import remap_sample_ids, subset_samples +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget, RawFileTask @@ -27,6 +28,7 @@ class WriteRemappedAndSubsettedCallsetTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() project_guid = luigi.Parameter() project_remap_path = luigi.Parameter() From 4a2f978c5795aba393ec11ec9a5a3b4e11cbe31c Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 13:31:50 -0400 Subject: [PATCH 05/10] cleanup --- v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py | 1 - 1 file changed, 1 deletion(-) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index e72a2d576..079996d35 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -97,7 +97,6 @@ def requires(self) -> list[luigi.Task]: WriteSexCheckTableTask( self.reference_genome, self.dataset_type, - self.sample_type, self.callset_path, self.imputed_sex_path, ), From fb737f879d155006e21ba6e84ed6fc709888d173 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 13:46:20 -0400 Subject: [PATCH 06/10] fix --- v03_pipeline/lib/tasks/write_relatedness_check_table.py | 1 - 1 file changed, 1 deletion(-) diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table.py b/v03_pipeline/lib/tasks/write_relatedness_check_table.py index 3893f5760..dc8bf17d6 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table.py @@ -44,7 +44,6 @@ def requires(self) -> luigi.Task: UpdatedCachedReferenceDatasetQuery( reference_genome=self.reference_genome, dataset_type=self.dataset_type, - sample_type=self.sample_type, crdq=CachedReferenceDatasetQuery.GNOMAD_QC, ) if Env.REFERENCE_DATA_AUTO_UPDATE From 2e2676b374b3de0970be67dc3863b4a92454c238 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 14:05:04 -0400 Subject: [PATCH 07/10] missed a few! --- v03_pipeline/lib/tasks/write_new_variants_table.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/v03_pipeline/lib/tasks/write_new_variants_table.py b/v03_pipeline/lib/tasks/write_new_variants_table.py index abcfc5458..4779b5c42 100644 --- a/v03_pipeline/lib/tasks/write_new_variants_table.py +++ b/v03_pipeline/lib/tasks/write_new_variants_table.py @@ -85,7 +85,6 @@ def requires(self) -> list[luigi.Task]: UpdateVariantAnnotationsTableWithUpdatedReferenceDataset( self.reference_genome, self.dataset_type, - self.sample_type, ), ] else: @@ -93,7 +92,6 @@ def requires(self) -> list[luigi.Task]: BaseUpdateVariantAnnotationsTableTask( self.reference_genome, self.dataset_type, - self.sample_type, ), ] if self.dataset_type.has_lookup_table: From 89754a415bc4316dcbddae897836f9736746d9c4 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 14:31:41 -0400 Subject: [PATCH 08/10] shitshow --- .../update_variant_annotations_table_with_new_samples_test.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index 80a8fe2b4..d80d6cb87 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -234,7 +234,6 @@ def test_multiple_update_vat( BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) ) mock_vep.side_effect = lambda ht, **_: ht.annotate(vep=MOCK_VEP_DATA) @@ -653,7 +652,6 @@ def test_update_vat_grch37( BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) ) mock_vep.side_effect = lambda ht, **_: ht.annotate(vep=MOCK_VEP_DATA) @@ -719,7 +717,6 @@ def test_update_vat_without_accessing_private_datasets( BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) ) shutil.rmtree( @@ -787,7 +784,6 @@ def test_mito_update_vat( BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.MITO, - sample_type=SampleType.WGS, ) ) mock_register_alleles.side_effect = None From 2e1620f181bb46f794893034d1d98d6cbac7b1c8 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 14:52:52 -0400 Subject: [PATCH 09/10] flip order here --- v03_pipeline/lib/tasks/write_project_family_tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/lib/tasks/write_project_family_tables.py b/v03_pipeline/lib/tasks/write_project_family_tables.py index 7bd59005b..b7d83cd49 100644 --- a/v03_pipeline/lib/tasks/write_project_family_tables.py +++ b/v03_pipeline/lib/tasks/write_project_family_tables.py @@ -50,8 +50,8 @@ def run(self): update_project_table_task: luigi.Target = yield UpdateProjectTableTask( self.reference_genome, self.dataset_type, - self.sample_type, self.project_guid, + self.sample_type, self.callset_path, self.project_remap_path, self.project_pedigree_path, From d730d8c927702ec6156c4b33fd33bd8120c0b08f Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 12 Jun 2024 15:12:49 -0400 Subject: [PATCH 10/10] flip order --- v03_pipeline/lib/tasks/write_family_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/lib/tasks/write_family_table.py b/v03_pipeline/lib/tasks/write_family_table.py index 3ea156fdf..73400983f 100644 --- a/v03_pipeline/lib/tasks/write_family_table.py +++ b/v03_pipeline/lib/tasks/write_family_table.py @@ -54,8 +54,8 @@ def requires(self) -> luigi.Task: return UpdateProjectTableTask( self.reference_genome, self.dataset_type, - self.sample_type, self.project_guid, + self.sample_type, self.callset_path, self.project_remap_path, self.project_pedigree_path,