diff --git a/v03_pipeline/lib/tasks/base/base_hail_table.py b/v03_pipeline/lib/tasks/base/base_hail_table.py index e8c2b47e5..23480ffb9 100644 --- a/v03_pipeline/lib/tasks/base/base_hail_table.py +++ b/v03_pipeline/lib/tasks/base/base_hail_table.py @@ -2,7 +2,7 @@ import luigi from v03_pipeline.lib.logger import get_logger -from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome from v03_pipeline.lib.tasks.files import GCSorLocalFolderTarget logger = get_logger(__name__) @@ -11,7 +11,6 @@ class BaseHailTableTask(luigi.Task): reference_genome = luigi.EnumParameter(enum=ReferenceGenome) dataset_type = luigi.EnumParameter(enum=DatasetType) - sample_type = luigi.EnumParameter(enum=SampleType) def output(self) -> luigi.Target: raise NotImplementedError diff --git a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py index c04e4f060..32f5f8205 100644 --- a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py +++ b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py @@ -39,7 +39,6 @@ def requires(self) -> list[luigi.Task]: UpdatedReferenceDatasetCollectionTask( self.reference_genome, self.dataset_type, - self.sample_type, rdc, ) if Env.REFERENCE_DATA_AUTO_UPDATE diff --git a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py index 5ee33d4cd..05f90e097 100644 --- a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py +++ b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py @@ -8,7 +8,6 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.paths import valid_reference_dataset_collection_path from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import ( @@ -59,7 +58,6 @@ def test_should_create_initialized_table(self, mock_update_rdc_task) -> None: vat_task = BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) self.assertTrue('annotations.ht' in vat_task.output().path) self.assertTrue(DatasetType.SNV_INDEL.value in vat_task.output().path) diff --git a/v03_pipeline/lib/tasks/delete_family_table_test.py b/v03_pipeline/lib/tasks/delete_family_table_test.py index d4f07c146..43e92bb6b 100644 --- a/v03_pipeline/lib/tasks/delete_family_table_test.py +++ b/v03_pipeline/lib/tasks/delete_family_table_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import family_table_path from v03_pipeline.lib.tasks.delete_family_table import DeleteFamilyTableTask from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase @@ -50,7 +50,6 @@ def test_delete_family_table_task(self) -> None: task = DeleteFamilyTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, family_guid='abc_1', ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/delete_family_tables.py b/v03_pipeline/lib/tasks/delete_family_tables.py index f8ff365a2..a68f4dc28 100644 --- a/v03_pipeline/lib/tasks/delete_family_tables.py +++ b/v03_pipeline/lib/tasks/delete_family_tables.py @@ -23,7 +23,6 @@ def run(self): DeleteFamilyTableTask( reference_genome=self.reference_genome, dataset_type=self.dataset_type, - sample_type=self.sample_type, family_guid=family_guid, ), ) diff --git a/v03_pipeline/lib/tasks/delete_family_tables_test.py b/v03_pipeline/lib/tasks/delete_family_tables_test.py index be8d99eea..535299602 100644 --- a/v03_pipeline/lib/tasks/delete_family_tables_test.py +++ b/v03_pipeline/lib/tasks/delete_family_tables_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import family_table_path from v03_pipeline.lib.tasks.delete_family_tables import ( DeleteFamilyTablesTask, @@ -38,7 +38,6 @@ def test_delete_project_family_tables_task(self) -> None: task = DeleteFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, family_guids=['family_a', 'family_b'], ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/delete_project_family_tables.py b/v03_pipeline/lib/tasks/delete_project_family_tables.py index e366edb99..befca9a45 100644 --- a/v03_pipeline/lib/tasks/delete_project_family_tables.py +++ b/v03_pipeline/lib/tasks/delete_project_family_tables.py @@ -35,7 +35,6 @@ def run(self): DeleteFamilyTableTask( reference_genome=self.reference_genome, dataset_type=self.dataset_type, - sample_type=self.sample_type, family_guid=family_guid, ), ) diff --git a/v03_pipeline/lib/tasks/delete_project_family_tables_test.py b/v03_pipeline/lib/tasks/delete_project_family_tables_test.py index e86005f9a..3cb56f1c8 100644 --- a/v03_pipeline/lib/tasks/delete_project_family_tables_test.py +++ b/v03_pipeline/lib/tasks/delete_project_family_tables_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import family_table_path, project_table_path from v03_pipeline.lib.tasks.delete_project_family_tables import ( DeleteProjectFamilyTablesTask, @@ -149,7 +149,6 @@ def test_delete_project_family_tables_task(self) -> None: task = DeleteProjectFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, project_guid='project_a', ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/delete_project_table.py b/v03_pipeline/lib/tasks/delete_project_table.py index 283d7f137..0a403ea3b 100644 --- a/v03_pipeline/lib/tasks/delete_project_table.py +++ b/v03_pipeline/lib/tasks/delete_project_table.py @@ -15,7 +15,6 @@ def requires(self) -> luigi.Task: return DeleteProjectFamilyTablesTask( self.reference_genome, self.dataset_type, - self.sample_type, self.project_guid, ) diff --git a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py index 5efbad471..bcae534ba 100644 --- a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py +++ b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py @@ -4,7 +4,6 @@ CachedReferenceDatasetQuery, DatasetType, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import ( UpdatedCachedReferenceDatasetQuery, @@ -14,7 +13,6 @@ class UpdateCachedReferenceDatasetQueries(luigi.Task): reference_genome = luigi.EnumParameter(enum=ReferenceGenome) dataset_type = luigi.EnumParameter(enum=DatasetType) - sample_type = luigi.EnumParameter(enum=SampleType) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py index 845ccfc45..794a77897 100644 --- a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py +++ b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py @@ -7,7 +7,6 @@ CachedReferenceDatasetQuery, DatasetType, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.tasks.reference_data.update_cached_reference_dataset_queries import ( UpdateCachedReferenceDatasetQueries, @@ -25,7 +24,6 @@ def test_37_snv_indel(self, mock_crdq_task): task = UpdateCachedReferenceDatasetQueries( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) worker.add(task) worker.run() @@ -35,25 +33,21 @@ def test_37_snv_indel(self, mock_crdq_task): mock.call( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ), mock.call( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, ), mock.call( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_QC, ), mock.call( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS, ), ], @@ -65,7 +59,6 @@ def test_38_snv_indel(self, mock_crdq_task): task = UpdateCachedReferenceDatasetQueries( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) worker.add(task) worker.run() @@ -75,25 +68,21 @@ def test_38_snv_indel(self, mock_crdq_task): mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ), mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, ), mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_QC, ), mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS, ), ], @@ -105,7 +94,6 @@ def test_38_mito(self, mock_crdq_task): task = UpdateCachedReferenceDatasetQueries( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.MITO, - sample_type=SampleType.WGS, ) worker.add(task) worker.run() @@ -115,7 +103,6 @@ def test_38_mito(self, mock_crdq_task): mock.call( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.MITO, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ), ], @@ -127,7 +114,6 @@ def test_38_sv(self, mock_crdq_task): task = UpdateCachedReferenceDatasetQueries( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SV, - sample_type=SampleType.WGS, ) worker.add(task) worker.run() diff --git a/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py b/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py index 67a5492bf..dc6304cf5 100644 --- a/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py +++ b/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py @@ -15,7 +15,6 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.paths import valid_reference_dataset_collection_path from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS @@ -730,7 +729,6 @@ def test_update_vat_with_updated_rdc_snv_indel_38( task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) worker = luigi.worker.Worker() worker.add(task) @@ -925,7 +923,6 @@ def test_update_vat_with_updated_rdc_mito_38( task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.MITO, - sample_type=SampleType.WGS, ) worker = luigi.worker.Worker() worker.add(task) @@ -1068,7 +1065,6 @@ def test_update_vat_with_updated_rdc_snv_indel_37( task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) worker = luigi.worker.Worker() worker.add(task) diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py index 92fc7718b..ef54d471d 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py @@ -68,7 +68,6 @@ def requires(self) -> luigi.Task: return UpdatedReferenceDatasetCollectionTask( self.reference_genome, self.dataset_type, - self.sample_type, ReferenceDatasetCollection.COMBINED, ) if self.crdq.query_raw_dataset: diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py index 8dd9558c8..210a8cc8a 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py @@ -11,7 +11,6 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, @@ -109,7 +108,6 @@ def test_gnomad_qc( task = UpdatedCachedReferenceDatasetQuery( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.GNOMAD_QC, ) worker.add(task) @@ -199,7 +197,6 @@ def _clinvar_path_variants(table, **_: Any): task = UpdatedCachedReferenceDatasetQuery( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS, ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py b/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py index 06c73559b..9995225c0 100644 --- a/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py +++ b/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py @@ -10,7 +10,6 @@ DatasetType, ReferenceDatasetCollection, ReferenceGenome, - SampleType, ) from v03_pipeline.lib.paths import valid_reference_dataset_collection_path from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS @@ -170,7 +169,6 @@ def test_update_task_with_empty_reference_data_table( task = UpdatedReferenceDatasetCollectionTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_dataset_collection=ReferenceDatasetCollection.COMBINED, ) worker.add(task) @@ -280,7 +278,6 @@ def test_update_task_with_existing_reference_dataset_collection_table( task = UpdatedReferenceDatasetCollectionTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_dataset_collection=ReferenceDatasetCollection.COMBINED, ) worker.add(task) diff --git a/v03_pipeline/lib/tasks/update_lookup_table.py b/v03_pipeline/lib/tasks/update_lookup_table.py index eb6068e76..0c389b713 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table.py +++ b/v03_pipeline/lib/tasks/update_lookup_table.py @@ -7,6 +7,7 @@ join_lookup_hts, remove_family_guids, ) +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.model.constants import PROJECTS_EXCLUDED_FROM_LOOKUP from v03_pipeline.lib.tasks.base.base_update_lookup_table import ( BaseUpdateLookupTableTask, @@ -17,6 +18,7 @@ class UpdateLookupTableTask(BaseUpdateLookupTableTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_paths = luigi.ListParameter() project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() diff --git a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py index 283dd5003..70915ef9d 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.tasks.update_lookup_table_with_deleted_families import ( UpdateLookupTableWithDeletedFamiliesTask, ) @@ -17,7 +17,6 @@ def test_delete_project_empty_table( worker = luigi.worker.Worker() task = UpdateLookupTableWithDeletedFamiliesTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='R0555_seqr_demo', family_guids=['abc'], @@ -132,7 +131,6 @@ def test_delete_project( worker = luigi.worker.Worker() task = UpdateLookupTableWithDeletedFamiliesTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_a', family_guids=['1', '3'], diff --git a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py index 5375d4c32..e40e034ec 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py +++ b/v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py @@ -3,7 +3,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.tasks.update_lookup_table_with_deleted_project import ( UpdateLookupTableWithDeletedProjectTask, ) @@ -17,7 +17,6 @@ def test_delete_project_empty_table( worker = luigi.worker.Worker() task = UpdateLookupTableWithDeletedProjectTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='R0555_seqr_demo', ) @@ -131,7 +130,6 @@ def test_delete_project( worker = luigi.worker.Worker() task = UpdateLookupTableWithDeletedProjectTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_a', ) diff --git a/v03_pipeline/lib/tasks/update_project_table.py b/v03_pipeline/lib/tasks/update_project_table.py index 508c51c30..288b6b20c 100644 --- a/v03_pipeline/lib/tasks/update_project_table.py +++ b/v03_pipeline/lib/tasks/update_project_table.py @@ -7,6 +7,7 @@ join_family_entries_hts, remove_family_guids, ) +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.tasks.base.base_update_project_table import ( BaseUpdateProjectTableTask, ) @@ -16,6 +17,7 @@ class UpdateProjectTableTask(BaseUpdateProjectTableTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() project_remap_path = luigi.Parameter() project_pedigree_path = luigi.Parameter() diff --git a/v03_pipeline/lib/tasks/update_project_table_with_deleted_families_test.py b/v03_pipeline/lib/tasks/update_project_table_with_deleted_families_test.py index 77936595c..b1f5cc5af 100644 --- a/v03_pipeline/lib/tasks/update_project_table_with_deleted_families_test.py +++ b/v03_pipeline/lib/tasks/update_project_table_with_deleted_families_test.py @@ -1,7 +1,7 @@ import hail as hl import luigi -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import project_table_path from v03_pipeline.lib.tasks.update_project_table_with_deleted_families import ( UpdateProjectTableWithDeletedFamiliesTask, @@ -128,7 +128,6 @@ def test_update_project_with_deleted_families(self): worker = luigi.worker.Worker() task = UpdateProjectTableWithDeletedFamiliesTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_a', family_guids=['family_b'], diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families.py index 4db8ca582..a77f5280c 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families.py @@ -24,7 +24,6 @@ def requires(self) -> luigi.Task | None: if self.dataset_type.has_lookup_table: return UpdateLookupTableWithDeletedFamiliesTask( dataset_type=self.dataset_type, - sample_type=self.sample_type, reference_genome=self.reference_genome, project_guid=self.project_guid, family_guids=self.family_guids, diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families_test.py index 266ddf19e..67410ef18 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_families_test.py @@ -1,7 +1,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import ( lookup_table_path, variant_annotations_table_path, @@ -139,7 +139,6 @@ def test_update_annotations_with_deleted_project(self) -> None: worker = luigi.worker.Worker() task = UpdateVariantAnnotationsTableWithDeletedFamiliesTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_a', family_guids=['2', '3'], diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project.py index 447c16e04..a0a97637b 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project.py @@ -19,7 +19,6 @@ def requires(self) -> luigi.Task | None: if self.dataset_type.has_lookup_table: return UpdateLookupTableWithDeletedProjectTask( dataset_type=self.dataset_type, - sample_type=self.sample_type, reference_genome=self.reference_genome, project_guid=self.project_guid, ) diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py index a77bc28b9..295a9577b 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_deleted_project_test.py @@ -1,7 +1,7 @@ import hail as hl import luigi.worker -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType +from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.paths import ( lookup_table_path, variant_annotations_table_path, @@ -147,7 +147,6 @@ def test_update_annotations_with_deleted_project(self) -> None: worker = luigi.worker.Worker() task = UpdateVariantAnnotationsTableWithDeletedProjectTask( dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, reference_genome=ReferenceGenome.GRCh38, project_guid='project_b', ) diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py index 99e6c49df..a38bd2bc7 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py @@ -3,6 +3,7 @@ from v03_pipeline.lib.annotations.fields import get_fields from v03_pipeline.lib.misc.callsets import callset_project_pairs, get_callset_ht +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.paths import ( lookup_table_path, new_variants_table_path, @@ -16,6 +17,7 @@ class UpdateVariantAnnotationsTableWithNewSamplesTask( BaseUpdateVariantAnnotationsTableTask, ): + sample_type = luigi.EnumParameter(enum=SampleType) callset_paths = luigi.ListParameter() project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py index 80a8fe2b4..d80d6cb87 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py @@ -234,7 +234,6 @@ def test_multiple_update_vat( BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) ) mock_vep.side_effect = lambda ht, **_: ht.annotate(vep=MOCK_VEP_DATA) @@ -653,7 +652,6 @@ def test_update_vat_grch37( BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh37, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) ) mock_vep.side_effect = lambda ht, **_: ht.annotate(vep=MOCK_VEP_DATA) @@ -719,7 +717,6 @@ def test_update_vat_without_accessing_private_datasets( BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, - sample_type=SampleType.WGS, ) ) shutil.rmtree( @@ -787,7 +784,6 @@ def test_mito_update_vat( BaseUpdateVariantAnnotationsTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.MITO, - sample_type=SampleType.WGS, ) ) mock_register_alleles.side_effect = None diff --git a/v03_pipeline/lib/tasks/write_family_table.py b/v03_pipeline/lib/tasks/write_family_table.py index ef3025842..73400983f 100644 --- a/v03_pipeline/lib/tasks/write_family_table.py +++ b/v03_pipeline/lib/tasks/write_family_table.py @@ -1,6 +1,7 @@ import hail as hl import luigi +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.paths import family_table_path from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget @@ -10,6 +11,7 @@ class WriteFamilyTableTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() project_guid = luigi.Parameter() project_remap_path = luigi.Parameter() @@ -52,8 +54,8 @@ def requires(self) -> luigi.Task: return UpdateProjectTableTask( self.reference_genome, self.dataset_type, - self.sample_type, self.project_guid, + self.sample_type, self.callset_path, self.project_remap_path, self.project_pedigree_path, diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index e5847380b..07b0bb37e 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -15,7 +15,7 @@ validate_sample_type, ) from v03_pipeline.lib.misc.vets import annotate_vets -from v03_pipeline.lib.model import CachedReferenceDatasetQuery +from v03_pipeline.lib.model import CachedReferenceDatasetQuery, SampleType from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, @@ -31,6 +31,7 @@ class WriteImportedCallsetTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() imputed_sex_path = luigi.Parameter(default=None) filters_path = luigi.OptionalParameter( @@ -81,7 +82,6 @@ def requires(self) -> list[luigi.Task]: UpdatedCachedReferenceDatasetQuery( reference_genome=self.reference_genome, dataset_type=self.dataset_type, - sample_type=self.sample_type, crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, ) if Env.REFERENCE_DATA_AUTO_UPDATE @@ -103,7 +103,6 @@ def requires(self) -> list[luigi.Task]: WriteSexCheckTableTask( self.reference_genome, self.dataset_type, - self.sample_type, self.callset_path, self.imputed_sex_path, ), diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run.py b/v03_pipeline/lib/tasks/write_metadata_for_run.py index 3ec7d4f64..80b39caca 100644 --- a/v03_pipeline/lib/tasks/write_metadata_for_run.py +++ b/v03_pipeline/lib/tasks/write_metadata_for_run.py @@ -4,6 +4,7 @@ import luigi from v03_pipeline.lib.misc.callsets import callset_project_pairs +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.paths import metadata_for_run_path from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget @@ -13,6 +14,7 @@ class WriteMetadataForRunTask(BaseHailTableTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_paths = luigi.ListParameter() project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() diff --git a/v03_pipeline/lib/tasks/write_new_variants_table.py b/v03_pipeline/lib/tasks/write_new_variants_table.py index 734b07d73..4779b5c42 100644 --- a/v03_pipeline/lib/tasks/write_new_variants_table.py +++ b/v03_pipeline/lib/tasks/write_new_variants_table.py @@ -10,7 +10,7 @@ from v03_pipeline.lib.misc.allele_registry import register_alleles_in_chunks from v03_pipeline.lib.misc.callsets import callset_project_pairs, get_callset_ht from v03_pipeline.lib.misc.math import constrain -from v03_pipeline.lib.model import Env, ReferenceDatasetCollection +from v03_pipeline.lib.model import Env, ReferenceDatasetCollection, SampleType from v03_pipeline.lib.paths import ( new_variants_table_path, variant_annotations_table_path, @@ -37,6 +37,7 @@ class WriteNewVariantsTableTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_paths = luigi.ListParameter() project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() @@ -84,7 +85,6 @@ def requires(self) -> list[luigi.Task]: UpdateVariantAnnotationsTableWithUpdatedReferenceDataset( self.reference_genome, self.dataset_type, - self.sample_type, ), ] else: @@ -92,7 +92,6 @@ def requires(self) -> list[luigi.Task]: BaseUpdateVariantAnnotationsTableTask( self.reference_genome, self.dataset_type, - self.sample_type, ), ] if self.dataset_type.has_lookup_table: diff --git a/v03_pipeline/lib/tasks/write_project_family_tables.py b/v03_pipeline/lib/tasks/write_project_family_tables.py index fe96f441b..b7d83cd49 100644 --- a/v03_pipeline/lib/tasks/write_project_family_tables.py +++ b/v03_pipeline/lib/tasks/write_project_family_tables.py @@ -1,12 +1,14 @@ import hail as hl import luigi +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask from v03_pipeline.lib.tasks.update_project_table import UpdateProjectTableTask from v03_pipeline.lib.tasks.write_family_table import WriteFamilyTableTask class WriteProjectFamilyTablesTask(BaseHailTableTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() project_guid = luigi.Parameter() project_remap_path = luigi.Parameter() @@ -48,8 +50,8 @@ def run(self): update_project_table_task: luigi.Target = yield UpdateProjectTableTask( self.reference_genome, self.dataset_type, - self.sample_type, self.project_guid, + self.sample_type, self.callset_path, self.project_remap_path, self.project_pedigree_path, diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table.py b/v03_pipeline/lib/tasks/write_relatedness_check_table.py index 1ba75446c..dc8bf17d6 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table.py @@ -2,7 +2,7 @@ import luigi from v03_pipeline.lib.methods.relatedness import call_relatedness -from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env +from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env, SampleType from v03_pipeline.lib.paths import ( cached_reference_dataset_query_path, relatedness_check_table_path, @@ -16,6 +16,7 @@ class WriteRelatednessCheckTableTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() def output(self) -> luigi.Target: @@ -43,7 +44,6 @@ def requires(self) -> luigi.Task: UpdatedCachedReferenceDatasetQuery( reference_genome=self.reference_genome, dataset_type=self.dataset_type, - sample_type=self.sample_type, crdq=CachedReferenceDatasetQuery.GNOMAD_QC, ) if Env.REFERENCE_DATA_AUTO_UPDATE diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index 7998fb689..079996d35 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -14,6 +14,7 @@ ) from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families from v03_pipeline.lib.misc.sample_ids import remap_sample_ids, subset_samples +from v03_pipeline.lib.model import SampleType from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask from v03_pipeline.lib.tasks.files import GCSorLocalTarget, RawFileTask @@ -27,6 +28,7 @@ class WriteRemappedAndSubsettedCallsetTask(BaseWriteTask): + sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() project_guid = luigi.Parameter() project_remap_path = luigi.Parameter() @@ -95,7 +97,6 @@ def requires(self) -> list[luigi.Task]: WriteSexCheckTableTask( self.reference_genome, self.dataset_type, - self.sample_type, self.callset_path, self.imputed_sex_path, ),