|
| 1 | +import os |
| 2 | + |
1 | 3 | import hail as hl
|
| 4 | +import hailtop.fs as hfs |
2 | 5 | import luigi
|
3 | 6 | import luigi.util
|
4 | 7 |
|
5 |
| -from v03_pipeline.lib.misc.project_tables import get_valid_project_tables |
| 8 | +from v03_pipeline.lib.model import SampleType |
| 9 | +from v03_pipeline.lib.paths import project_table_path |
6 | 10 | from v03_pipeline.lib.tasks.base.base_loading_pipeline_params import (
|
7 | 11 | BaseLoadingPipelineParams,
|
8 | 12 | )
|
|
11 | 15 |
|
12 | 16 | @luigi.util.inherits(BaseLoadingPipelineParams)
|
13 | 17 | class DeleteProjectFamilyTablesTask(luigi.Task):
|
| 18 | + sample_type = luigi.EnumParameter(enum=SampleType) |
14 | 19 | project_guid = luigi.Parameter()
|
15 | 20 |
|
16 | 21 | def __init__(self, *args, **kwargs):
|
17 | 22 | super().__init__(*args, **kwargs)
|
18 | 23 | self.dynamic_delete_family_table_tasks = set()
|
19 | 24 |
|
20 | 25 | def complete(self) -> bool:
|
| 26 | + project_ht_path = project_table_path( |
| 27 | + self.reference_genome, |
| 28 | + self.dataset_type, |
| 29 | + self.sample_type, |
| 30 | + self.project_guid, |
| 31 | + ) |
| 32 | + if not ( |
| 33 | + hfs.exists(project_ht_path) |
| 34 | + and hfs.exists( |
| 35 | + os.path.join(project_ht_path, '_SUCCESS'), |
| 36 | + ) |
| 37 | + ): |
| 38 | + # Task should succeed even if there are no |
| 39 | + # project tables. We rely on the premise |
| 40 | + # that a family table is generated from a |
| 41 | + # project table to assert that if a project |
| 42 | + # table does not exist, the family tables must |
| 43 | + # also not exist. |
| 44 | + return True |
21 | 45 | return len(self.dynamic_delete_family_table_tasks) >= 1 and all(
|
22 | 46 | delete_family_table_task.complete()
|
23 | 47 | for delete_family_table_task in self.dynamic_delete_family_table_tasks
|
24 | 48 | )
|
25 | 49 |
|
26 | 50 | def run(self):
|
27 |
| - project_tables = get_valid_project_tables( |
28 |
| - self.reference_genome, |
29 |
| - self.dataset_type, |
30 |
| - self.project_guid, |
| 51 | + project_ht = hl.read_table( |
| 52 | + project_table_path( |
| 53 | + self.reference_genome, |
| 54 | + self.dataset_type, |
| 55 | + self.sample_type, |
| 56 | + self.project_guid, |
| 57 | + ), |
31 | 58 | )
|
32 |
| - for project_ht, sample_type in project_tables: |
33 |
| - family_guids = hl.eval(project_ht.globals.family_guids) |
34 |
| - for family_guid in family_guids: |
35 |
| - self.dynamic_delete_family_table_tasks.add( |
36 |
| - DeleteFamilyTableTask( |
37 |
| - reference_genome=self.reference_genome, |
38 |
| - dataset_type=self.dataset_type, |
39 |
| - sample_type=sample_type, |
40 |
| - family_guid=family_guid, |
41 |
| - ), |
42 |
| - ) |
| 59 | + family_guids = hl.eval(project_ht.globals.family_guids) |
| 60 | + for family_guid in family_guids: |
| 61 | + self.dynamic_delete_family_table_tasks.add( |
| 62 | + DeleteFamilyTableTask( |
| 63 | + reference_genome=self.reference_genome, |
| 64 | + dataset_type=self.dataset_type, |
| 65 | + sample_type=self.sample_type, |
| 66 | + family_guid=family_guid, |
| 67 | + ), |
| 68 | + ) |
43 | 69 | yield self.dynamic_delete_family_table_tasks
|
0 commit comments