Skip to content

Commit 97b90a6

Browse files
authored
Split "SampleType" out from shared task base class. (#808)
* Remove concept of private crdqs * lint * fix logic * Move SampleType out of BaseHailTableTask * cleanup * fix * missed a few! * shitshow * flip order here * flip order
1 parent 70b8cc4 commit 97b90a6

33 files changed

+31
-70
lines changed

v03_pipeline/lib/tasks/base/base_hail_table.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import luigi
33

44
from v03_pipeline.lib.logger import get_logger
5-
from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome, SampleType
5+
from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome
66
from v03_pipeline.lib.tasks.files import GCSorLocalFolderTarget
77

88
logger = get_logger(__name__)
@@ -11,7 +11,6 @@
1111
class BaseHailTableTask(luigi.Task):
1212
reference_genome = luigi.EnumParameter(enum=ReferenceGenome)
1313
dataset_type = luigi.EnumParameter(enum=DatasetType)
14-
sample_type = luigi.EnumParameter(enum=SampleType)
1514

1615
def output(self) -> luigi.Target:
1716
raise NotImplementedError

v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ def requires(self) -> list[luigi.Task]:
3939
UpdatedReferenceDatasetCollectionTask(
4040
self.reference_genome,
4141
self.dataset_type,
42-
self.sample_type,
4342
rdc,
4443
)
4544
if Env.REFERENCE_DATA_AUTO_UPDATE

v03_pipeline/lib/tasks/base/base_update_variant_annotations_table_test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
DatasetType,
99
ReferenceDatasetCollection,
1010
ReferenceGenome,
11-
SampleType,
1211
)
1312
from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
1413
from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import (
@@ -59,7 +58,6 @@ def test_should_create_initialized_table(self, mock_update_rdc_task) -> None:
5958
vat_task = BaseUpdateVariantAnnotationsTableTask(
6059
reference_genome=ReferenceGenome.GRCh38,
6160
dataset_type=DatasetType.SNV_INDEL,
62-
sample_type=SampleType.WGS,
6361
)
6462
self.assertTrue('annotations.ht' in vat_task.output().path)
6563
self.assertTrue(DatasetType.SNV_INDEL.value in vat_task.output().path)

v03_pipeline/lib/tasks/delete_family_table_test.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import hail as hl
44
import luigi.worker
55

6-
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
6+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
77
from v03_pipeline.lib.paths import family_table_path
88
from v03_pipeline.lib.tasks.delete_family_table import DeleteFamilyTableTask
99
from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
@@ -50,7 +50,6 @@ def test_delete_family_table_task(self) -> None:
5050
task = DeleteFamilyTableTask(
5151
reference_genome=ReferenceGenome.GRCh38,
5252
dataset_type=DatasetType.SNV_INDEL,
53-
sample_type=SampleType.WGS,
5453
family_guid='abc_1',
5554
)
5655
worker.add(task)

v03_pipeline/lib/tasks/delete_family_tables.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def run(self):
2323
DeleteFamilyTableTask(
2424
reference_genome=self.reference_genome,
2525
dataset_type=self.dataset_type,
26-
sample_type=self.sample_type,
2726
family_guid=family_guid,
2827
),
2928
)

v03_pipeline/lib/tasks/delete_family_tables_test.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import hail as hl
44
import luigi.worker
55

6-
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
6+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
77
from v03_pipeline.lib.paths import family_table_path
88
from v03_pipeline.lib.tasks.delete_family_tables import (
99
DeleteFamilyTablesTask,
@@ -38,7 +38,6 @@ def test_delete_project_family_tables_task(self) -> None:
3838
task = DeleteFamilyTablesTask(
3939
reference_genome=ReferenceGenome.GRCh38,
4040
dataset_type=DatasetType.SNV_INDEL,
41-
sample_type=SampleType.WGS,
4241
family_guids=['family_a', 'family_b'],
4342
)
4443
worker.add(task)

v03_pipeline/lib/tasks/delete_project_family_tables.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def run(self):
3535
DeleteFamilyTableTask(
3636
reference_genome=self.reference_genome,
3737
dataset_type=self.dataset_type,
38-
sample_type=self.sample_type,
3938
family_guid=family_guid,
4039
),
4140
)

v03_pipeline/lib/tasks/delete_project_family_tables_test.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import hail as hl
44
import luigi.worker
55

6-
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
6+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
77
from v03_pipeline.lib.paths import family_table_path, project_table_path
88
from v03_pipeline.lib.tasks.delete_project_family_tables import (
99
DeleteProjectFamilyTablesTask,
@@ -149,7 +149,6 @@ def test_delete_project_family_tables_task(self) -> None:
149149
task = DeleteProjectFamilyTablesTask(
150150
reference_genome=ReferenceGenome.GRCh38,
151151
dataset_type=DatasetType.SNV_INDEL,
152-
sample_type=SampleType.WGS,
153152
project_guid='project_a',
154153
)
155154
worker.add(task)

v03_pipeline/lib/tasks/delete_project_table.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ def requires(self) -> luigi.Task:
1515
return DeleteProjectFamilyTablesTask(
1616
self.reference_genome,
1717
self.dataset_type,
18-
self.sample_type,
1918
self.project_guid,
2019
)
2120

v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
CachedReferenceDatasetQuery,
55
DatasetType,
66
ReferenceGenome,
7-
SampleType,
87
)
98
from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import (
109
UpdatedCachedReferenceDatasetQuery,
@@ -14,7 +13,6 @@
1413
class UpdateCachedReferenceDatasetQueries(luigi.Task):
1514
reference_genome = luigi.EnumParameter(enum=ReferenceGenome)
1615
dataset_type = luigi.EnumParameter(enum=DatasetType)
17-
sample_type = luigi.EnumParameter(enum=SampleType)
1816

1917
def __init__(self, *args, **kwargs):
2018
super().__init__(*args, **kwargs)

v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
CachedReferenceDatasetQuery,
88
DatasetType,
99
ReferenceGenome,
10-
SampleType,
1110
)
1211
from v03_pipeline.lib.tasks.reference_data.update_cached_reference_dataset_queries import (
1312
UpdateCachedReferenceDatasetQueries,
@@ -25,7 +24,6 @@ def test_37_snv_indel(self, mock_crdq_task):
2524
task = UpdateCachedReferenceDatasetQueries(
2625
reference_genome=ReferenceGenome.GRCh37,
2726
dataset_type=DatasetType.SNV_INDEL,
28-
sample_type=SampleType.WGS,
2927
)
3028
worker.add(task)
3129
worker.run()
@@ -35,25 +33,21 @@ def test_37_snv_indel(self, mock_crdq_task):
3533
mock.call(
3634
reference_genome=ReferenceGenome.GRCh37,
3735
dataset_type=DatasetType.SNV_INDEL,
38-
sample_type=SampleType.WGS,
3936
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
4037
),
4138
mock.call(
4239
reference_genome=ReferenceGenome.GRCh37,
4340
dataset_type=DatasetType.SNV_INDEL,
44-
sample_type=SampleType.WGS,
4541
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
4642
),
4743
mock.call(
4844
reference_genome=ReferenceGenome.GRCh37,
4945
dataset_type=DatasetType.SNV_INDEL,
50-
sample_type=SampleType.WGS,
5146
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
5247
),
5348
mock.call(
5449
reference_genome=ReferenceGenome.GRCh37,
5550
dataset_type=DatasetType.SNV_INDEL,
56-
sample_type=SampleType.WGS,
5751
crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
5852
),
5953
],
@@ -65,7 +59,6 @@ def test_38_snv_indel(self, mock_crdq_task):
6559
task = UpdateCachedReferenceDatasetQueries(
6660
reference_genome=ReferenceGenome.GRCh38,
6761
dataset_type=DatasetType.SNV_INDEL,
68-
sample_type=SampleType.WGS,
6962
)
7063
worker.add(task)
7164
worker.run()
@@ -75,25 +68,21 @@ def test_38_snv_indel(self, mock_crdq_task):
7568
mock.call(
7669
reference_genome=ReferenceGenome.GRCh38,
7770
dataset_type=DatasetType.SNV_INDEL,
78-
sample_type=SampleType.WGS,
7971
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
8072
),
8173
mock.call(
8274
reference_genome=ReferenceGenome.GRCh38,
8375
dataset_type=DatasetType.SNV_INDEL,
84-
sample_type=SampleType.WGS,
8576
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
8677
),
8778
mock.call(
8879
reference_genome=ReferenceGenome.GRCh38,
8980
dataset_type=DatasetType.SNV_INDEL,
90-
sample_type=SampleType.WGS,
9181
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
9282
),
9383
mock.call(
9484
reference_genome=ReferenceGenome.GRCh38,
9585
dataset_type=DatasetType.SNV_INDEL,
96-
sample_type=SampleType.WGS,
9786
crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
9887
),
9988
],
@@ -105,7 +94,6 @@ def test_38_mito(self, mock_crdq_task):
10594
task = UpdateCachedReferenceDatasetQueries(
10695
reference_genome=ReferenceGenome.GRCh38,
10796
dataset_type=DatasetType.MITO,
108-
sample_type=SampleType.WGS,
10997
)
11098
worker.add(task)
11199
worker.run()
@@ -115,7 +103,6 @@ def test_38_mito(self, mock_crdq_task):
115103
mock.call(
116104
reference_genome=ReferenceGenome.GRCh38,
117105
dataset_type=DatasetType.MITO,
118-
sample_type=SampleType.WGS,
119106
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
120107
),
121108
],
@@ -127,7 +114,6 @@ def test_38_sv(self, mock_crdq_task):
127114
task = UpdateCachedReferenceDatasetQueries(
128115
reference_genome=ReferenceGenome.GRCh38,
129116
dataset_type=DatasetType.SV,
130-
sample_type=SampleType.WGS,
131117
)
132118
worker.add(task)
133119
worker.run()

v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
DatasetType,
2020
ReferenceDatasetCollection,
2121
ReferenceGenome,
22-
SampleType,
2322
)
2423
from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
2524
from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS
@@ -734,7 +733,6 @@ def test_update_vat_with_updated_rdc_snv_indel_38(
734733
task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
735734
reference_genome=ReferenceGenome.GRCh38,
736735
dataset_type=DatasetType.SNV_INDEL,
737-
sample_type=SampleType.WGS,
738736
)
739737
worker = luigi.worker.Worker()
740738
worker.add(task)
@@ -941,7 +939,6 @@ def test_update_vat_with_updated_rdc_mito_38(
941939
task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
942940
reference_genome=ReferenceGenome.GRCh38,
943941
dataset_type=DatasetType.MITO,
944-
sample_type=SampleType.WGS,
945942
)
946943
worker = luigi.worker.Worker()
947944
worker.add(task)
@@ -1084,7 +1081,6 @@ def test_update_vat_with_updated_rdc_snv_indel_37(
10841081
task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
10851082
reference_genome=ReferenceGenome.GRCh37,
10861083
dataset_type=DatasetType.SNV_INDEL,
1087-
sample_type=SampleType.WGS,
10881084
)
10891085
worker = luigi.worker.Worker()
10901086
worker.add(task)

v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@ def requires(self) -> luigi.Task:
6868
return UpdatedReferenceDatasetCollectionTask(
6969
self.reference_genome,
7070
self.dataset_type,
71-
self.sample_type,
7271
ReferenceDatasetCollection.COMBINED,
7372
)
7473
if self.crdq.query_raw_dataset:

v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
DatasetType,
1212
ReferenceDatasetCollection,
1313
ReferenceGenome,
14-
SampleType,
1514
)
1615
from v03_pipeline.lib.paths import (
1716
cached_reference_dataset_query_path,
@@ -109,7 +108,6 @@ def test_gnomad_qc(
109108
task = UpdatedCachedReferenceDatasetQuery(
110109
reference_genome=ReferenceGenome.GRCh38,
111110
dataset_type=DatasetType.SNV_INDEL,
112-
sample_type=SampleType.WGS,
113111
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
114112
)
115113
worker.add(task)
@@ -199,7 +197,6 @@ def _clinvar_path_variants(table, **_: Any):
199197
task = UpdatedCachedReferenceDatasetQuery(
200198
reference_genome=ReferenceGenome.GRCh38,
201199
dataset_type=DatasetType.SNV_INDEL,
202-
sample_type=SampleType.WGS,
203200
crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
204201
)
205202
worker.add(task)

v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
DatasetType,
1111
ReferenceDatasetCollection,
1212
ReferenceGenome,
13-
SampleType,
1413
)
1514
from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
1615
from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS
@@ -170,7 +169,6 @@ def test_update_task_with_empty_reference_data_table(
170169
task = UpdatedReferenceDatasetCollectionTask(
171170
reference_genome=ReferenceGenome.GRCh38,
172171
dataset_type=DatasetType.SNV_INDEL,
173-
sample_type=SampleType.WGS,
174172
reference_dataset_collection=ReferenceDatasetCollection.COMBINED,
175173
)
176174
worker.add(task)
@@ -280,7 +278,6 @@ def test_update_task_with_existing_reference_dataset_collection_table(
280278
task = UpdatedReferenceDatasetCollectionTask(
281279
reference_genome=ReferenceGenome.GRCh38,
282280
dataset_type=DatasetType.SNV_INDEL,
283-
sample_type=SampleType.WGS,
284281
reference_dataset_collection=ReferenceDatasetCollection.COMBINED,
285282
)
286283
worker.add(task)

v03_pipeline/lib/tasks/update_lookup_table.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
join_lookup_hts,
88
remove_family_guids,
99
)
10+
from v03_pipeline.lib.model import SampleType
1011
from v03_pipeline.lib.model.constants import PROJECTS_EXCLUDED_FROM_LOOKUP
1112
from v03_pipeline.lib.tasks.base.base_update_lookup_table import (
1213
BaseUpdateLookupTableTask,
@@ -17,6 +18,7 @@
1718

1819

1920
class UpdateLookupTableTask(BaseUpdateLookupTableTask):
21+
sample_type = luigi.EnumParameter(enum=SampleType)
2022
callset_paths = luigi.ListParameter()
2123
project_guids = luigi.ListParameter()
2224
project_remap_paths = luigi.ListParameter()

v03_pipeline/lib/tasks/update_lookup_table_with_deleted_families_test.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import hail as hl
44
import luigi.worker
55

6-
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
6+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
77
from v03_pipeline.lib.tasks.update_lookup_table_with_deleted_families import (
88
UpdateLookupTableWithDeletedFamiliesTask,
99
)
@@ -17,7 +17,6 @@ def test_delete_project_empty_table(
1717
worker = luigi.worker.Worker()
1818
task = UpdateLookupTableWithDeletedFamiliesTask(
1919
dataset_type=DatasetType.SNV_INDEL,
20-
sample_type=SampleType.WGS,
2120
reference_genome=ReferenceGenome.GRCh38,
2221
project_guid='R0555_seqr_demo',
2322
family_guids=['abc'],
@@ -132,7 +131,6 @@ def test_delete_project(
132131
worker = luigi.worker.Worker()
133132
task = UpdateLookupTableWithDeletedFamiliesTask(
134133
dataset_type=DatasetType.SNV_INDEL,
135-
sample_type=SampleType.WGS,
136134
reference_genome=ReferenceGenome.GRCh38,
137135
project_guid='project_a',
138136
family_guids=['1', '3'],

v03_pipeline/lib/tasks/update_lookup_table_with_deleted_project_test.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import hail as hl
44
import luigi.worker
55

6-
from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
6+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
77
from v03_pipeline.lib.tasks.update_lookup_table_with_deleted_project import (
88
UpdateLookupTableWithDeletedProjectTask,
99
)
@@ -17,7 +17,6 @@ def test_delete_project_empty_table(
1717
worker = luigi.worker.Worker()
1818
task = UpdateLookupTableWithDeletedProjectTask(
1919
dataset_type=DatasetType.SNV_INDEL,
20-
sample_type=SampleType.WGS,
2120
reference_genome=ReferenceGenome.GRCh38,
2221
project_guid='R0555_seqr_demo',
2322
)
@@ -131,7 +130,6 @@ def test_delete_project(
131130
worker = luigi.worker.Worker()
132131
task = UpdateLookupTableWithDeletedProjectTask(
133132
dataset_type=DatasetType.SNV_INDEL,
134-
sample_type=SampleType.WGS,
135133
reference_genome=ReferenceGenome.GRCh38,
136134
project_guid='project_a',
137135
)

0 commit comments

Comments
 (0)