Skip to content

Commit 8ee36c6

Browse files
authored
Delete Project & Family Table Tasks. (#767)
* Delete project tasks * cleanup * ruff format * well * rename * hacking away * almost there! * ruff * Fix missing updates change * ruff * Remove debug code * remove bad merge * more precision in test * project table * allow for missing project * remove some unnecessary checks * test already deleted family * Lots of renames * More updates * Sketch * Flesh out test * fix paths * Rename base hail table * a bunch more renames * delete project table * Add delete project families * add comment * test it! * Fix * add dep
1 parent 057cb4d commit 8ee36c6

36 files changed

+517
-72
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import hailtop.fs as hfs
2+
3+
from v03_pipeline.lib.logger import get_logger
4+
from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask
5+
from v03_pipeline.lib.tasks.files import GCSorLocalFolderTarget, GCSorLocalTarget
6+
7+
logger = get_logger(__name__)
8+
9+
10+
class BaseDeleteTableTask(BaseHailTableTask):
11+
def complete(self) -> bool:
12+
logger.info(f'DeleteTableTask: checking if {self.output().path} exists')
13+
return (
14+
not GCSorLocalTarget(self.output().path).exists()
15+
and not GCSorLocalFolderTarget(self.output().path).exists()
16+
)
17+
18+
def run(self) -> None:
19+
hfs.rmtree(self.output().path)

v03_pipeline/lib/tasks/base/base_update_task.py renamed to v03_pipeline/lib/tasks/base/base_update.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import hail as hl
22

33
from v03_pipeline.lib.misc.io import write
4-
from v03_pipeline.lib.tasks.base.base_hail_table_task import BaseHailTableTask
4+
from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask
55

66

77
class BaseUpdateTask(BaseHailTableTask):

v03_pipeline/lib/tasks/base/base_lookup_table_task.py renamed to v03_pipeline/lib/tasks/base/base_update_lookup_table.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
import luigi
33

44
from v03_pipeline.lib.paths import lookup_table_path
5-
from v03_pipeline.lib.tasks.base.base_update_task import BaseUpdateTask
5+
from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
66
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
77

88

9-
class BaseLookupTableTask(BaseUpdateTask):
9+
class BaseUpdateLookupTableTask(BaseUpdateTask):
1010
def output(self) -> luigi.Target:
1111
return GCSorLocalTarget(
1212
lookup_table_path(

v03_pipeline/lib/tasks/base/base_project_table_task.py renamed to v03_pipeline/lib/tasks/base/base_update_project_table.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
import luigi
33

44
from v03_pipeline.lib.paths import project_table_path
5-
from v03_pipeline.lib.tasks.base.base_update_task import BaseUpdateTask
5+
from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
66
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
77

88

9-
class BaseProjectTableTask(BaseUpdateTask):
9+
class BaseUpdateProjectTableTask(BaseUpdateTask):
1010
project_guid = luigi.Parameter()
1111

1212
def output(self) -> luigi.Target:

v03_pipeline/lib/tasks/base/base_variant_annotations_table.py renamed to v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@
1313
valid_reference_dataset_collection_path,
1414
variant_annotations_table_path,
1515
)
16-
from v03_pipeline.lib.tasks.base.base_update_task import BaseUpdateTask
16+
from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
1717
from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask
1818
from v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection import (
1919
UpdatedReferenceDatasetCollectionTask,
2020
)
2121

2222

23-
class BaseVariantAnnotationsTableTask(BaseUpdateTask):
23+
class BaseUpdateVariantAnnotationsTableTask(BaseUpdateTask):
2424
@property
2525
def rdc_annotation_dependencies(self) -> dict[str, hl.Table]:
2626
return get_rdc_annotation_dependencies(self.dataset_type, self.reference_genome)

v03_pipeline/lib/tasks/base/base_variant_annotations_table_test.py renamed to v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
SampleType,
1212
)
1313
from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
14-
from v03_pipeline.lib.tasks.base.base_variant_annotations_table import (
15-
BaseVariantAnnotationsTableTask,
14+
from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import (
15+
BaseUpdateVariantAnnotationsTableTask,
1616
)
1717
from v03_pipeline.lib.tasks.files import GCSorLocalFolderTarget
1818
from v03_pipeline.lib.test.mock_complete_task import MockCompleteTask
@@ -52,11 +52,11 @@ def setUp(self) -> None:
5252
)
5353

5454
@patch(
55-
'v03_pipeline.lib.tasks.base.base_variant_annotations_table.UpdatedReferenceDatasetCollectionTask',
55+
'v03_pipeline.lib.tasks.base.base_update_variant_annotations_table.UpdatedReferenceDatasetCollectionTask',
5656
)
5757
def test_should_create_initialized_table(self, mock_update_rdc_task) -> None:
5858
mock_update_rdc_task.return_value = MockCompleteTask()
59-
vat_task = BaseVariantAnnotationsTableTask(
59+
vat_task = BaseUpdateVariantAnnotationsTableTask(
6060
reference_genome=ReferenceGenome.GRCh38,
6161
dataset_type=DatasetType.SNV_INDEL,
6262
sample_type=SampleType.WGS,

v03_pipeline/lib/tasks/base/base_write_task.py renamed to v03_pipeline/lib/tasks/base/base_write.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import hail as hl
22

33
from v03_pipeline.lib.misc.io import write
4-
from v03_pipeline.lib.tasks.base.base_hail_table_task import BaseHailTableTask
4+
from v03_pipeline.lib.tasks.base.base_hail_table import BaseHailTableTask
55

66

77
class BaseWriteTask(BaseHailTableTask):
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import luigi
2+
3+
from v03_pipeline.lib.paths import family_table_path
4+
from v03_pipeline.lib.tasks.base.base_delete_table import BaseDeleteTableTask
5+
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
6+
7+
8+
class DeleteFamilyTableTask(BaseDeleteTableTask):
9+
family_guid = luigi.Parameter()
10+
11+
def output(self) -> luigi.Target:
12+
return GCSorLocalTarget(
13+
family_table_path(
14+
self.reference_genome,
15+
self.dataset_type,
16+
self.family_guid,
17+
),
18+
)
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
import pathlib
2+
3+
import hail as hl
4+
import luigi.worker
5+
6+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
7+
from v03_pipeline.lib.paths import family_table_path
8+
from v03_pipeline.lib.tasks.delete_family_table import DeleteFamilyTableTask
9+
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
10+
11+
12+
class DeleteTableTaskTest(MockedDatarootTestCase):
13+
def setUp(self) -> None:
14+
super().setUp()
15+
ht = hl.Table.parallelize(
16+
[
17+
{
18+
'locus': hl.Locus(
19+
contig='chr1',
20+
position=876499,
21+
reference_genome='GRCh38',
22+
),
23+
'alleles': ['A', 'G'],
24+
},
25+
{
26+
'locus': hl.Locus(
27+
contig='chr1',
28+
position=878314,
29+
reference_genome='GRCh38',
30+
),
31+
'alleles': ['G', 'C'],
32+
},
33+
],
34+
hl.tstruct(
35+
locus=hl.tlocus('GRCh38'),
36+
alleles=hl.tarray(hl.tstr),
37+
),
38+
key=['locus', 'alleles'],
39+
)
40+
ht.write(
41+
family_table_path(
42+
ReferenceGenome.GRCh38,
43+
DatasetType.SNV_INDEL,
44+
'abc_1',
45+
),
46+
)
47+
48+
def test_delete_family_table_task(self) -> None:
49+
worker = luigi.worker.Worker()
50+
task = DeleteFamilyTableTask(
51+
reference_genome=ReferenceGenome.GRCh38,
52+
dataset_type=DatasetType.SNV_INDEL,
53+
sample_type=SampleType.WGS,
54+
family_guid='abc_1',
55+
)
56+
worker.add(task)
57+
worker.run()
58+
self.assertTrue(task.complete())
59+
self.assertFalse(
60+
pathlib.Path(
61+
family_table_path(
62+
ReferenceGenome.GRCh38,
63+
DatasetType.SNV_INDEL,
64+
'abc_1',
65+
),
66+
).exists(),
67+
)

0 commit comments

Comments
 (0)