Skip to content

Split "SampleType" out from shared task base class. #808

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions v03_pipeline/lib/tasks/base/base_hail_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import luigi

from v03_pipeline.lib.logger import get_logger
from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome, SampleType
from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome
from v03_pipeline.lib.tasks.files import GCSorLocalFolderTarget

logger = get_logger(__name__)
Expand All @@ -11,7 +11,6 @@
class BaseHailTableTask(luigi.Task):
reference_genome = luigi.EnumParameter(enum=ReferenceGenome)
dataset_type = luigi.EnumParameter(enum=DatasetType)
sample_type = luigi.EnumParameter(enum=SampleType)

def output(self) -> luigi.Target:
raise NotImplementedError
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def requires(self) -> list[luigi.Task]:
UpdatedReferenceDatasetCollectionTask(
self.reference_genome,
self.dataset_type,
self.sample_type,
rdc,
)
if Env.REFERENCE_DATA_AUTO_UPDATE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
DatasetType,
ReferenceDatasetCollection,
ReferenceGenome,
SampleType,
)
from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import (
Expand Down Expand Up @@ -59,7 +58,6 @@ def test_should_create_initialized_table(self, mock_update_rdc_task) -> None:
vat_task = BaseUpdateVariantAnnotationsTableTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
)
self.assertTrue('annotations.ht' in vat_task.output().path)
self.assertTrue(DatasetType.SNV_INDEL.value in vat_task.output().path)
Expand Down
3 changes: 1 addition & 2 deletions v03_pipeline/lib/tasks/delete_family_table_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import hail as hl
import luigi.worker

from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
from v03_pipeline.lib.paths import family_table_path
from v03_pipeline.lib.tasks.delete_family_table import DeleteFamilyTableTask
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
Expand Down Expand Up @@ -50,7 +50,6 @@ def test_delete_family_table_task(self) -> None:
task = DeleteFamilyTableTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
family_guid='abc_1',
)
worker.add(task)
Expand Down
1 change: 0 additions & 1 deletion v03_pipeline/lib/tasks/delete_family_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def run(self):
DeleteFamilyTableTask(
reference_genome=self.reference_genome,
dataset_type=self.dataset_type,
sample_type=self.sample_type,
family_guid=family_guid,
),
)
Expand Down
3 changes: 1 addition & 2 deletions v03_pipeline/lib/tasks/delete_family_tables_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import hail as hl
import luigi.worker

from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
from v03_pipeline.lib.paths import family_table_path
from v03_pipeline.lib.tasks.delete_family_tables import (
DeleteFamilyTablesTask,
Expand Down Expand Up @@ -38,7 +38,6 @@ def test_delete_project_family_tables_task(self) -> None:
task = DeleteFamilyTablesTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
family_guids=['family_a', 'family_b'],
)
worker.add(task)
Expand Down
1 change: 0 additions & 1 deletion v03_pipeline/lib/tasks/delete_project_family_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ def run(self):
DeleteFamilyTableTask(
reference_genome=self.reference_genome,
dataset_type=self.dataset_type,
sample_type=self.sample_type,
family_guid=family_guid,
),
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import hail as hl
import luigi.worker

from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
from v03_pipeline.lib.paths import family_table_path, project_table_path
from v03_pipeline.lib.tasks.delete_project_family_tables import (
DeleteProjectFamilyTablesTask,
Expand Down Expand Up @@ -149,7 +149,6 @@ def test_delete_project_family_tables_task(self) -> None:
task = DeleteProjectFamilyTablesTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
project_guid='project_a',
)
worker.add(task)
Expand Down
1 change: 0 additions & 1 deletion v03_pipeline/lib/tasks/delete_project_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ def requires(self) -> luigi.Task:
return DeleteProjectFamilyTablesTask(
self.reference_genome,
self.dataset_type,
self.sample_type,
self.project_guid,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
CachedReferenceDatasetQuery,
DatasetType,
ReferenceGenome,
SampleType,
)
from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import (
UpdatedCachedReferenceDatasetQuery,
Expand All @@ -14,7 +13,6 @@
class UpdateCachedReferenceDatasetQueries(luigi.Task):
reference_genome = luigi.EnumParameter(enum=ReferenceGenome)
dataset_type = luigi.EnumParameter(enum=DatasetType)
sample_type = luigi.EnumParameter(enum=SampleType)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
CachedReferenceDatasetQuery,
DatasetType,
ReferenceGenome,
SampleType,
)
from v03_pipeline.lib.tasks.reference_data.update_cached_reference_dataset_queries import (
UpdateCachedReferenceDatasetQueries,
Expand All @@ -25,7 +24,6 @@ def test_37_snv_indel(self, mock_crdq_task):
task = UpdateCachedReferenceDatasetQueries(
reference_genome=ReferenceGenome.GRCh37,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
)
worker.add(task)
worker.run()
Expand All @@ -35,25 +33,21 @@ def test_37_snv_indel(self, mock_crdq_task):
mock.call(
reference_genome=ReferenceGenome.GRCh37,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
),
mock.call(
reference_genome=ReferenceGenome.GRCh37,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
),
mock.call(
reference_genome=ReferenceGenome.GRCh37,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
),
mock.call(
reference_genome=ReferenceGenome.GRCh37,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
),
],
Expand All @@ -65,7 +59,6 @@ def test_38_snv_indel(self, mock_crdq_task):
task = UpdateCachedReferenceDatasetQueries(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
)
worker.add(task)
worker.run()
Expand All @@ -75,25 +68,21 @@ def test_38_snv_indel(self, mock_crdq_task):
mock.call(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
),
mock.call(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
),
mock.call(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
),
mock.call(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
),
],
Expand All @@ -105,7 +94,6 @@ def test_38_mito(self, mock_crdq_task):
task = UpdateCachedReferenceDatasetQueries(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.MITO,
sample_type=SampleType.WGS,
)
worker.add(task)
worker.run()
Expand All @@ -115,7 +103,6 @@ def test_38_mito(self, mock_crdq_task):
mock.call(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.MITO,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
),
],
Expand All @@ -127,7 +114,6 @@ def test_38_sv(self, mock_crdq_task):
task = UpdateCachedReferenceDatasetQueries(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SV,
sample_type=SampleType.WGS,
)
worker.add(task)
worker.run()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
DatasetType,
ReferenceDatasetCollection,
ReferenceGenome,
SampleType,
)
from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS
Expand Down Expand Up @@ -730,7 +729,6 @@ def test_update_vat_with_updated_rdc_snv_indel_38(
task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
)
worker = luigi.worker.Worker()
worker.add(task)
Expand Down Expand Up @@ -925,7 +923,6 @@ def test_update_vat_with_updated_rdc_mito_38(
task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.MITO,
sample_type=SampleType.WGS,
)
worker = luigi.worker.Worker()
worker.add(task)
Expand Down Expand Up @@ -1068,7 +1065,6 @@ def test_update_vat_with_updated_rdc_snv_indel_37(
task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
reference_genome=ReferenceGenome.GRCh37,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
)
worker = luigi.worker.Worker()
worker.add(task)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ def requires(self) -> luigi.Task:
return UpdatedReferenceDatasetCollectionTask(
self.reference_genome,
self.dataset_type,
self.sample_type,
ReferenceDatasetCollection.COMBINED,
)
if self.crdq.query_raw_dataset:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
DatasetType,
ReferenceDatasetCollection,
ReferenceGenome,
SampleType,
)
from v03_pipeline.lib.paths import (
cached_reference_dataset_query_path,
Expand Down Expand Up @@ -109,7 +108,6 @@ def test_gnomad_qc(
task = UpdatedCachedReferenceDatasetQuery(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
)
worker.add(task)
Expand Down Expand Up @@ -199,7 +197,6 @@ def _clinvar_path_variants(table, **_: Any):
task = UpdatedCachedReferenceDatasetQuery(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
)
worker.add(task)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
DatasetType,
ReferenceDatasetCollection,
ReferenceGenome,
SampleType,
)
from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS
Expand Down Expand Up @@ -170,7 +169,6 @@ def test_update_task_with_empty_reference_data_table(
task = UpdatedReferenceDatasetCollectionTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
reference_dataset_collection=ReferenceDatasetCollection.COMBINED,
)
worker.add(task)
Expand Down Expand Up @@ -280,7 +278,6 @@ def test_update_task_with_existing_reference_dataset_collection_table(
task = UpdatedReferenceDatasetCollectionTask(
reference_genome=ReferenceGenome.GRCh38,
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
reference_dataset_collection=ReferenceDatasetCollection.COMBINED,
)
worker.add(task)
Expand Down
2 changes: 2 additions & 0 deletions v03_pipeline/lib/tasks/update_lookup_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
join_lookup_hts,
remove_family_guids,
)
from v03_pipeline.lib.model import SampleType
from v03_pipeline.lib.model.constants import PROJECTS_EXCLUDED_FROM_LOOKUP
from v03_pipeline.lib.tasks.base.base_update_lookup_table import (
BaseUpdateLookupTableTask,
Expand All @@ -17,6 +18,7 @@


class UpdateLookupTableTask(BaseUpdateLookupTableTask):
sample_type = luigi.EnumParameter(enum=SampleType)
callset_paths = luigi.ListParameter()
project_guids = luigi.ListParameter()
project_remap_paths = luigi.ListParameter()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import hail as hl
import luigi.worker

from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
from v03_pipeline.lib.tasks.update_lookup_table_with_deleted_families import (
UpdateLookupTableWithDeletedFamiliesTask,
)
Expand All @@ -17,7 +17,6 @@ def test_delete_project_empty_table(
worker = luigi.worker.Worker()
task = UpdateLookupTableWithDeletedFamiliesTask(
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
reference_genome=ReferenceGenome.GRCh38,
project_guid='R0555_seqr_demo',
family_guids=['abc'],
Expand Down Expand Up @@ -132,7 +131,6 @@ def test_delete_project(
worker = luigi.worker.Worker()
task = UpdateLookupTableWithDeletedFamiliesTask(
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
reference_genome=ReferenceGenome.GRCh38,
project_guid='project_a',
family_guids=['1', '3'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import hail as hl
import luigi.worker

from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
from v03_pipeline.lib.tasks.update_lookup_table_with_deleted_project import (
UpdateLookupTableWithDeletedProjectTask,
)
Expand All @@ -17,7 +17,6 @@ def test_delete_project_empty_table(
worker = luigi.worker.Worker()
task = UpdateLookupTableWithDeletedProjectTask(
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
reference_genome=ReferenceGenome.GRCh38,
project_guid='R0555_seqr_demo',
)
Expand Down Expand Up @@ -131,7 +130,6 @@ def test_delete_project(
worker = luigi.worker.Worker()
task = UpdateLookupTableWithDeletedProjectTask(
dataset_type=DatasetType.SNV_INDEL,
sample_type=SampleType.WGS,
reference_genome=ReferenceGenome.GRCh38,
project_guid='project_a',
)
Expand Down
Loading
Loading