diff --git a/v03_pipeline/lib/tasks/delete_project_family_tables.py b/v03_pipeline/lib/tasks/delete_project_family_tables.py index 2dd8e2841..28cf6cfad 100644 --- a/v03_pipeline/lib/tasks/delete_project_family_tables.py +++ b/v03_pipeline/lib/tasks/delete_project_family_tables.py @@ -1,8 +1,12 @@ +import os + import hail as hl +import hailtop.fs as hfs import luigi import luigi.util -from v03_pipeline.lib.misc.project_tables import get_valid_project_tables +from v03_pipeline.lib.model import SampleType +from v03_pipeline.lib.paths import project_table_path from v03_pipeline.lib.tasks.base.base_loading_pipeline_params import ( BaseLoadingPipelineParams, ) @@ -11,6 +15,7 @@ @luigi.util.inherits(BaseLoadingPipelineParams) class DeleteProjectFamilyTablesTask(luigi.Task): + sample_type = luigi.EnumParameter(enum=SampleType) project_guid = luigi.Parameter() def __init__(self, *args, **kwargs): @@ -18,26 +23,47 @@ def __init__(self, *args, **kwargs): self.dynamic_delete_family_table_tasks = set() def complete(self) -> bool: + project_ht_path = project_table_path( + self.reference_genome, + self.dataset_type, + self.sample_type, + self.project_guid, + ) + if not ( + hfs.exists(project_ht_path) + and hfs.exists( + os.path.join(project_ht_path, '_SUCCESS'), + ) + ): + # Task should succeed even if there are no + # project tables. We rely on the premise + # that a family table is generated from a + # project table to assert that if a project + # table does not exist, the family tables must + # also not exist. + return True return len(self.dynamic_delete_family_table_tasks) >= 1 and all( delete_family_table_task.complete() for delete_family_table_task in self.dynamic_delete_family_table_tasks ) def run(self): - project_tables = get_valid_project_tables( - self.reference_genome, - self.dataset_type, - self.project_guid, + project_ht = hl.read_table( + project_table_path( + self.reference_genome, + self.dataset_type, + self.sample_type, + self.project_guid, + ), ) - for project_ht, sample_type in project_tables: - family_guids = hl.eval(project_ht.globals.family_guids) - for family_guid in family_guids: - self.dynamic_delete_family_table_tasks.add( - DeleteFamilyTableTask( - reference_genome=self.reference_genome, - dataset_type=self.dataset_type, - sample_type=sample_type, - family_guid=family_guid, - ), - ) + family_guids = hl.eval(project_ht.globals.family_guids) + for family_guid in family_guids: + self.dynamic_delete_family_table_tasks.add( + DeleteFamilyTableTask( + reference_genome=self.reference_genome, + dataset_type=self.dataset_type, + sample_type=self.sample_type, + family_guid=family_guid, + ), + ) yield self.dynamic_delete_family_table_tasks diff --git a/v03_pipeline/lib/tasks/delete_project_family_tables_test.py b/v03_pipeline/lib/tasks/delete_project_family_tables_test.py index e6f10edb2..585aef160 100644 --- a/v03_pipeline/lib/tasks/delete_project_family_tables_test.py +++ b/v03_pipeline/lib/tasks/delete_project_family_tables_test.py @@ -147,11 +147,12 @@ def test_no_project_tables_for_project(self) -> None: task = DeleteProjectFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + sample_type=SampleType.WGS, project_guid='project_b', ) worker.add(task) worker.run() - self.assertFalse(task.complete()) + self.assertTrue(task.complete()) def test_delete_project_family_tables_task(self) -> None: self.assertTrue( @@ -198,6 +199,7 @@ def test_delete_project_family_tables_task(self) -> None: task = DeleteProjectFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + sample_type=SampleType.WGS, project_guid='project_a', ) worker.add(task)