Skip to content

Commit 686c2cf

Browse files
authored
Remove concept of private crdqs (#807)
* Remove concept of private crdqs * lint * fix logic
1 parent 63b7bb7 commit 686c2cf

9 files changed

+33
-48
lines changed

v03_pipeline/lib/model/cached_reference_dataset_query.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
import hail as hl
55

66
from v03_pipeline.lib.model.dataset_type import DatasetType
7-
from v03_pipeline.lib.model.definitions import AccessControl, ReferenceGenome
8-
from v03_pipeline.lib.model.environment import Env
7+
from v03_pipeline.lib.model.definitions import ReferenceGenome
98
from v03_pipeline.lib.reference_data.queries import (
109
clinvar_path_variants,
1110
gnomad_coding_and_noncoding_variants,
@@ -20,10 +19,6 @@ class CachedReferenceDatasetQuery(Enum):
2019
GNOMAD_QC = 'gnomad_qc'
2120
HIGH_AF_VARIANTS = 'high_af_variants'
2221

23-
@property
24-
def access_control(self) -> AccessControl:
25-
return AccessControl.PUBLIC
26-
2722
def dataset(self, dataset_type: DatasetType) -> str | None:
2823
return {
2924
CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS: 'clinvar_mito'
@@ -56,15 +51,10 @@ def for_reference_genome_dataset_type(
5651
reference_genome: ReferenceGenome,
5752
dataset_type: DatasetType,
5853
) -> list['CachedReferenceDatasetQuery']:
59-
crdqs = {
54+
return {
6055
(ReferenceGenome.GRCh38, DatasetType.SNV_INDEL): list(cls),
6156
(ReferenceGenome.GRCh38, DatasetType.MITO): [
6257
CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
6358
],
6459
(ReferenceGenome.GRCh37, DatasetType.SNV_INDEL): list(cls),
6560
}.get((reference_genome, dataset_type), [])
66-
if not Env.ACCESS_PRIVATE_REFERENCE_DATASETS:
67-
return [
68-
crdq for crdq in crdqs if crdq.access_control == AccessControl.PUBLIC
69-
]
70-
return crdqs

v03_pipeline/lib/paths.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,22 @@ def _v03_reference_data_prefix(
4141
)
4242

4343

44+
def cached_reference_dataset_query_path(
45+
reference_genome: ReferenceGenome,
46+
dataset_type: DatasetType,
47+
cached_reference_dataset_query: CachedReferenceDatasetQuery,
48+
) -> str:
49+
return os.path.join(
50+
_v03_reference_data_prefix(
51+
AccessControl.PUBLIC,
52+
reference_genome,
53+
),
54+
dataset_type.value,
55+
'cached_reference_dataset_queries',
56+
f'{cached_reference_dataset_query.value}.ht',
57+
)
58+
59+
4460
def family_table_path(
4561
reference_genome: ReferenceGenome,
4662
dataset_type: DatasetType,
@@ -182,27 +198,6 @@ def sex_check_table_path(
182198
)
183199

184200

185-
def valid_cached_reference_dataset_query_path(
186-
reference_genome: ReferenceGenome,
187-
dataset_type: DatasetType,
188-
cached_reference_dataset_query: CachedReferenceDatasetQuery,
189-
) -> str | None:
190-
if (
191-
not Env.ACCESS_PRIVATE_REFERENCE_DATASETS
192-
and cached_reference_dataset_query.access_control == AccessControl.PRIVATE
193-
):
194-
return None
195-
return os.path.join(
196-
_v03_reference_data_prefix(
197-
cached_reference_dataset_query.access_control,
198-
reference_genome,
199-
),
200-
dataset_type.value,
201-
'cached_reference_dataset_queries',
202-
f'{cached_reference_dataset_query.value}.ht',
203-
)
204-
205-
206201
def valid_reference_dataset_collection_path(
207202
reference_genome: ReferenceGenome,
208203
dataset_type: DatasetType,

v03_pipeline/lib/paths_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
ReferenceGenome,
99
)
1010
from v03_pipeline.lib.paths import (
11+
cached_reference_dataset_query_path,
1112
family_table_path,
1213
imported_callset_path,
1314
lookup_table_path,
@@ -17,7 +18,6 @@
1718
relatedness_check_table_path,
1819
remapped_and_subsetted_callset_path,
1920
sex_check_table_path,
20-
valid_cached_reference_dataset_query_path,
2121
valid_reference_dataset_collection_path,
2222
variant_annotations_table_path,
2323
)
@@ -26,7 +26,7 @@
2626
class TestPaths(unittest.TestCase):
2727
def test_cached_reference_dataset_query_path(self) -> None:
2828
self.assertEqual(
29-
valid_cached_reference_dataset_query_path(
29+
cached_reference_dataset_query_path(
3030
ReferenceGenome.GRCh38,
3131
DatasetType.SNV_INDEL,
3232
CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,

v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
ReferenceDatasetCollection,
99
)
1010
from v03_pipeline.lib.paths import (
11-
valid_cached_reference_dataset_query_path,
11+
cached_reference_dataset_query_path,
1212
valid_reference_dataset_collection_path,
1313
)
1414
from v03_pipeline.lib.reference_data.compare_globals import (
@@ -56,7 +56,7 @@ def complete(self) -> bool:
5656

5757
def output(self) -> luigi.Target:
5858
return GCSorLocalTarget(
59-
valid_cached_reference_dataset_query_path(
59+
cached_reference_dataset_query_path(
6060
self.reference_genome,
6161
self.dataset_type,
6262
self.crdq,

v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
SampleType,
1515
)
1616
from v03_pipeline.lib.paths import (
17-
valid_cached_reference_dataset_query_path,
17+
cached_reference_dataset_query_path,
1818
valid_reference_dataset_collection_path,
1919
)
2020
from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS
@@ -167,7 +167,7 @@ def test_clinvar(
167167
# clinvar has version '2022-01-01'
168168
shutil.copytree(
169169
CLINVAR_CRDQ_PATH,
170-
valid_cached_reference_dataset_query_path(
170+
cached_reference_dataset_query_path(
171171
ReferenceGenome.GRCh38,
172172
DatasetType.SNV_INDEL,
173173
CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
SampleType,
2525
)
2626
from v03_pipeline.lib.paths import (
27-
valid_cached_reference_dataset_query_path,
27+
cached_reference_dataset_query_path,
2828
valid_reference_dataset_collection_path,
2929
)
3030
from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS
@@ -341,7 +341,7 @@ def test_multiple_update_vat(
341341
),
342342
)
343343
coding_and_noncoding_variants_ht.write(
344-
valid_cached_reference_dataset_query_path(
344+
cached_reference_dataset_query_path(
345345
ReferenceGenome.GRCh38,
346346
DatasetType.SNV_INDEL,
347347
CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,

v03_pipeline/lib/tasks/write_imported_callset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
from v03_pipeline.lib.model import CachedReferenceDatasetQuery
1919
from v03_pipeline.lib.model.environment import Env
2020
from v03_pipeline.lib.paths import (
21+
cached_reference_dataset_query_path,
2122
imported_callset_path,
2223
sex_check_table_path,
23-
valid_cached_reference_dataset_query_path,
2424
)
2525
from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
2626
from v03_pipeline.lib.tasks.files import CallsetTask, GCSorLocalTarget, HailTableTask
@@ -86,7 +86,7 @@ def requires(self) -> list[luigi.Task]:
8686
)
8787
if Env.REFERENCE_DATA_AUTO_UPDATE
8888
else HailTableTask(
89-
valid_cached_reference_dataset_query_path(
89+
cached_reference_dataset_query_path(
9090
self.reference_genome,
9191
self.dataset_type,
9292
CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
@@ -169,7 +169,7 @@ def create_table(self) -> hl.MatrixTable:
169169
validate_no_duplicate_variants(mt)
170170
validate_expected_contig_frequency(mt, self.reference_genome)
171171
coding_and_noncoding_ht = hl.read_table(
172-
valid_cached_reference_dataset_query_path(
172+
cached_reference_dataset_query_path(
173173
self.reference_genome,
174174
self.dataset_type,
175175
CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,

v03_pipeline/lib/tasks/write_relatedness_check_table.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from v03_pipeline.lib.methods.relatedness import call_relatedness
55
from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env
66
from v03_pipeline.lib.paths import (
7+
cached_reference_dataset_query_path,
78
relatedness_check_table_path,
8-
valid_cached_reference_dataset_query_path,
99
)
1010
from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
1111
from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask
@@ -48,7 +48,7 @@ def requires(self) -> luigi.Task:
4848
)
4949
if Env.REFERENCE_DATA_AUTO_UPDATE
5050
else HailTableTask(
51-
valid_cached_reference_dataset_query_path(
51+
cached_reference_dataset_query_path(
5252
self.reference_genome,
5353
self.dataset_type,
5454
CachedReferenceDatasetQuery.GNOMAD_QC,

v03_pipeline/lib/tasks/write_relatedness_check_table_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
SampleType,
1313
)
1414
from v03_pipeline.lib.paths import (
15+
cached_reference_dataset_query_path,
1516
imported_callset_path,
1617
relatedness_check_table_path,
17-
valid_cached_reference_dataset_query_path,
1818
)
1919
from v03_pipeline.lib.tasks.write_relatedness_check_table import (
2020
WriteRelatednessCheckTableTask,
@@ -45,7 +45,7 @@
4545
class WriteRelatednessCheckTableTaskTest(MockedDatarootTestCase):
4646
def setUp(self) -> None:
4747
super().setUp()
48-
self.gnomad_qc_path = valid_cached_reference_dataset_query_path(
48+
self.gnomad_qc_path = cached_reference_dataset_query_path(
4949
ReferenceGenome.GRCh38,
5050
DatasetType.SNV_INDEL,
5151
CachedReferenceDatasetQuery.GNOMAD_QC,

0 commit comments

Comments
 (0)