Skip to content

Dev #1039

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 13, 2025
Merged

Dev #1039

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 42 additions & 16 deletions v03_pipeline/lib/tasks/delete_project_family_tables.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os

import hail as hl
import hailtop.fs as hfs
import luigi
import luigi.util

from v03_pipeline.lib.misc.project_tables import get_valid_project_tables
from v03_pipeline.lib.model import SampleType
from v03_pipeline.lib.paths import project_table_path
from v03_pipeline.lib.tasks.base.base_loading_pipeline_params import (
BaseLoadingPipelineParams,
)
Expand All @@ -11,33 +15,55 @@

@luigi.util.inherits(BaseLoadingPipelineParams)
class DeleteProjectFamilyTablesTask(luigi.Task):
sample_type = luigi.EnumParameter(enum=SampleType)
project_guid = luigi.Parameter()

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.dynamic_delete_family_table_tasks = set()

def complete(self) -> bool:
project_ht_path = project_table_path(
self.reference_genome,
self.dataset_type,
self.sample_type,
self.project_guid,
)
if not (
hfs.exists(project_ht_path)
and hfs.exists(
os.path.join(project_ht_path, '_SUCCESS'),
)
):
# Task should succeed even if there are no
# project tables. We rely on the premise
# that a family table is generated from a
# project table to assert that if a project
# table does not exist, the family tables must
# also not exist.
return True
return len(self.dynamic_delete_family_table_tasks) >= 1 and all(
delete_family_table_task.complete()
for delete_family_table_task in self.dynamic_delete_family_table_tasks
)

def run(self):
project_tables = get_valid_project_tables(
self.reference_genome,
self.dataset_type,
self.project_guid,
project_ht = hl.read_table(
project_table_path(
self.reference_genome,
self.dataset_type,
self.sample_type,
self.project_guid,
),
)
for project_ht, sample_type in project_tables:
family_guids = hl.eval(project_ht.globals.family_guids)
for family_guid in family_guids:
self.dynamic_delete_family_table_tasks.add(
DeleteFamilyTableTask(
reference_genome=self.reference_genome,
dataset_type=self.dataset_type,
sample_type=sample_type,
family_guid=family_guid,
),
)
family_guids = hl.eval(project_ht.globals.family_guids)
for family_guid in family_guids:
self.dynamic_delete_family_table_tasks.add(
DeleteFamilyTableTask(
reference_genome=self.reference_genome,
dataset_type=self.dataset_type,
sample_type=self.sample_type,
family_guid=family_guid,
),
)
yield self.dynamic_delete_family_table_tasks
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,12 @@ def test_no_project_tables_for_project(self) -> None:
task = DeleteProjectFamilyTablesTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
project_guid='project_b',
)
worker.add(task)
worker.run()
self.assertFalse(task.complete())
self.assertTrue(task.complete())

def test_delete_project_family_tables_task(self) -> None:
self.assertTrue(
Expand Down Expand Up @@ -198,6 +199,7 @@ def test_delete_project_family_tables_task(self) -> None:
task = DeleteProjectFamilyTablesTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
project_guid='project_a',
)
worker.add(task)
Expand Down