From aabf64f0795038e6f1da1a10aa60505a2f8a91fc Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 30 Jan 2025 10:22:09 -0500 Subject: [PATCH 1/2] bump hail version (#1027) --- requirements.in | 2 +- requirements.txt | 4 ++-- v03_pipeline/deploy/Dockerfile | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.in b/requirements.in index 31cf9ed52..54e4e081c 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,4 @@ -hail==0.2.132 +hail==0.2.133 luigi==3.5.2 gnomad==0.6.4 aiofiles==24.1.0 diff --git a/requirements.txt b/requirements.txt index b64e3110f..3f0a4cc22 100644 --- a/requirements.txt +++ b/requirements.txt @@ -104,11 +104,11 @@ google-auth==2.35.0 # hail google-auth-oauthlib==0.8.0 # via hail -hail==0.2.132 +hail==0.2.133 # via -r requirements.in hdbscan==0.8.39 # via gnomad -humanize==1.1.0 +humanize==4.11.0 # via hail idna==3.10 # via diff --git a/v03_pipeline/deploy/Dockerfile b/v03_pipeline/deploy/Dockerfile index 6cab7adf7..72166487c 100644 --- a/v03_pipeline/deploy/Dockerfile +++ b/v03_pipeline/deploy/Dockerfile @@ -1,5 +1,5 @@ FROM docker:dind as BUILD -FROM hailgenetics/hail:0.2.132-py3.11 +FROM hailgenetics/hail:0.2.133-py3.11 COPY --from=BUILD /usr/local/bin/docker /usr/local/bin/docker LABEL maintainer="Broad TGG" From 7e8311d7982b00fd9e1c19aa635ef9c8d834a2d9 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 13 Feb 2025 10:28:27 -0500 Subject: [PATCH 2/2] Benb/bugfix allow deleting project to succeed if no project (#1038) * First pass * add comment * ruff --- .../lib/tasks/delete_project_family_tables.py | 58 ++++++++++++++----- .../delete_project_family_tables_test.py | 4 +- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/v03_pipeline/lib/tasks/delete_project_family_tables.py b/v03_pipeline/lib/tasks/delete_project_family_tables.py index 2dd8e2841..28cf6cfad 100644 --- a/v03_pipeline/lib/tasks/delete_project_family_tables.py +++ b/v03_pipeline/lib/tasks/delete_project_family_tables.py @@ -1,8 +1,12 @@ +import os + import hail as hl +import hailtop.fs as hfs import luigi import luigi.util -from v03_pipeline.lib.misc.project_tables import get_valid_project_tables +from v03_pipeline.lib.model import SampleType +from v03_pipeline.lib.paths import project_table_path from v03_pipeline.lib.tasks.base.base_loading_pipeline_params import ( BaseLoadingPipelineParams, ) @@ -11,6 +15,7 @@ @luigi.util.inherits(BaseLoadingPipelineParams) class DeleteProjectFamilyTablesTask(luigi.Task): + sample_type = luigi.EnumParameter(enum=SampleType) project_guid = luigi.Parameter() def __init__(self, *args, **kwargs): @@ -18,26 +23,47 @@ def __init__(self, *args, **kwargs): self.dynamic_delete_family_table_tasks = set() def complete(self) -> bool: + project_ht_path = project_table_path( + self.reference_genome, + self.dataset_type, + self.sample_type, + self.project_guid, + ) + if not ( + hfs.exists(project_ht_path) + and hfs.exists( + os.path.join(project_ht_path, '_SUCCESS'), + ) + ): + # Task should succeed even if there are no + # project tables. We rely on the premise + # that a family table is generated from a + # project table to assert that if a project + # table does not exist, the family tables must + # also not exist. + return True return len(self.dynamic_delete_family_table_tasks) >= 1 and all( delete_family_table_task.complete() for delete_family_table_task in self.dynamic_delete_family_table_tasks ) def run(self): - project_tables = get_valid_project_tables( - self.reference_genome, - self.dataset_type, - self.project_guid, + project_ht = hl.read_table( + project_table_path( + self.reference_genome, + self.dataset_type, + self.sample_type, + self.project_guid, + ), ) - for project_ht, sample_type in project_tables: - family_guids = hl.eval(project_ht.globals.family_guids) - for family_guid in family_guids: - self.dynamic_delete_family_table_tasks.add( - DeleteFamilyTableTask( - reference_genome=self.reference_genome, - dataset_type=self.dataset_type, - sample_type=sample_type, - family_guid=family_guid, - ), - ) + family_guids = hl.eval(project_ht.globals.family_guids) + for family_guid in family_guids: + self.dynamic_delete_family_table_tasks.add( + DeleteFamilyTableTask( + reference_genome=self.reference_genome, + dataset_type=self.dataset_type, + sample_type=self.sample_type, + family_guid=family_guid, + ), + ) yield self.dynamic_delete_family_table_tasks diff --git a/v03_pipeline/lib/tasks/delete_project_family_tables_test.py b/v03_pipeline/lib/tasks/delete_project_family_tables_test.py index e6f10edb2..585aef160 100644 --- a/v03_pipeline/lib/tasks/delete_project_family_tables_test.py +++ b/v03_pipeline/lib/tasks/delete_project_family_tables_test.py @@ -147,11 +147,12 @@ def test_no_project_tables_for_project(self) -> None: task = DeleteProjectFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + sample_type=SampleType.WGS, project_guid='project_b', ) worker.add(task) worker.run() - self.assertFalse(task.complete()) + self.assertTrue(task.complete()) def test_delete_project_family_tables_task(self) -> None: self.assertTrue( @@ -198,6 +199,7 @@ def test_delete_project_family_tables_task(self) -> None: task = DeleteProjectFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + sample_type=SampleType.WGS, project_guid='project_a', ) worker.add(task)