Skip to content

Commit 6c7c578

Browse files
authored
Remove reference data env var (#874)
* Remove reference data env var * lint * Remove env var from unit tests
1 parent 43bbcc3 commit 6c7c578

File tree

7 files changed

+13
-63
lines changed

7 files changed

+13
-63
lines changed

.github/workflows/unit-tests.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ jobs:
3636
run: ruff . --output-format github
3737
- name: Unit Tests
3838
run: |
39-
export REFERENCE_DATA_AUTO_UPDATE=1
4039
export ACCESS_PRIVATE_REFERENCE_DATASETS=1
4140
export PYSPARK_SUBMIT_ARGS='--driver-memory 8G pyspark-shell'
4241
nosetests --with-coverage --cover-package v03_pipeline/lib v03_pipeline/lib

v03_pipeline/lib/model/environment.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
)
3535
CHECK_SEX_AND_RELATEDNESS = os.environ.get('CHECK_SEX_AND_RELATEDNESS') == '1'
3636
EXPECT_WES_FILTERS = os.environ.get('EXPECT_WES_FILTERS') == '1'
37-
REFERENCE_DATA_AUTO_UPDATE = os.environ.get('REFERENCE_DATA_AUTO_UPDATE') == '1'
3837
SHOULD_REGISTER_ALLELES = os.environ.get('SHOULD_REGISTER_ALLELES') == '1'
3938

4039

@@ -51,7 +50,6 @@ class Env:
5150
LOADING_DATASETS: str = LOADING_DATASETS
5251
PRIVATE_REFERENCE_DATASETS: str = PRIVATE_REFERENCE_DATASETS
5352
PROJECT_ID: str | None = PROJECT_ID
54-
REFERENCE_DATA_AUTO_UPDATE: bool = REFERENCE_DATA_AUTO_UPDATE
5553
REFERENCE_DATASETS: str = REFERENCE_DATASETS
5654
SHOULD_REGISTER_ALLELES: bool = SHOULD_REGISTER_ALLELES
5755
VEP_CONFIG_PATH: str | None = VEP_CONFIG_PATH

v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,13 @@
66
get_rdc_annotation_dependencies,
77
)
88
from v03_pipeline.lib.model import (
9-
Env,
109
ReferenceDatasetCollection,
1110
)
1211
from v03_pipeline.lib.paths import (
13-
valid_reference_dataset_collection_path,
1412
variant_annotations_table_path,
1513
)
1614
from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
17-
from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask
15+
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
1816
from v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection import (
1917
UpdatedReferenceDatasetCollectionTask,
2018
)
@@ -41,14 +39,6 @@ def requires(self) -> list[luigi.Task]:
4139
self.dataset_type,
4240
rdc,
4341
)
44-
if Env.REFERENCE_DATA_AUTO_UPDATE
45-
else HailTableTask(
46-
valid_reference_dataset_collection_path(
47-
self.reference_genome,
48-
self.dataset_type,
49-
rdc,
50-
),
51-
)
5242
)
5343
for rdc in ReferenceDatasetCollection.for_reference_genome_dataset_type(
5444
self.reference_genome,

v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from v03_pipeline.lib.logger import get_logger
55
from v03_pipeline.lib.model import (
66
CachedReferenceDatasetQuery,
7-
Env,
87
ReferenceDatasetCollection,
98
)
109
from v03_pipeline.lib.paths import (
@@ -64,12 +63,6 @@ def output(self) -> luigi.Target:
6463
)
6564

6665
def requires(self) -> luigi.Task:
67-
if Env.REFERENCE_DATA_AUTO_UPDATE and not self.crdq.query_raw_dataset:
68-
return UpdatedReferenceDatasetCollectionTask(
69-
self.reference_genome,
70-
self.dataset_type,
71-
ReferenceDatasetCollection.COMBINED,
72-
)
7366
if self.crdq.query_raw_dataset:
7467
return HailTableTask(
7568
get_ht_path(
@@ -78,12 +71,10 @@ def requires(self) -> luigi.Task:
7871
],
7972
),
8073
)
81-
return HailTableTask(
82-
valid_reference_dataset_collection_path(
83-
self.reference_genome,
84-
self.dataset_type,
85-
ReferenceDatasetCollection.COMBINED,
86-
),
74+
return UpdatedReferenceDatasetCollectionTask(
75+
self.reference_genome,
76+
self.dataset_type,
77+
ReferenceDatasetCollection.COMBINED,
8778
)
8879

8980
def create_table(self) -> hl.Table:

v03_pipeline/lib/tasks/validate_callset.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
)
2121
from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
2222
from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
23-
from v03_pipeline.lib.tasks.files import CallsetTask, GCSorLocalTarget, HailTableTask
23+
from v03_pipeline.lib.tasks.files import CallsetTask, GCSorLocalTarget
2424
from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import (
2525
UpdatedCachedReferenceDatasetQuery,
2626
)
@@ -59,14 +59,6 @@ def requires(self) -> list[luigi.Task]:
5959
UpdatedCachedReferenceDatasetQuery,
6060
crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
6161
)
62-
if Env.REFERENCE_DATA_AUTO_UPDATE
63-
else HailTableTask(
64-
cached_reference_dataset_query_path(
65-
self.reference_genome,
66-
self.dataset_type,
67-
CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
68-
),
69-
),
7062
),
7163
]
7264
if (

v03_pipeline/lib/tasks/write_new_variants_table.py

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,6 @@
2222
load_gencode_gene_symbol_to_gene_id,
2323
)
2424
from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
25-
from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import (
26-
BaseUpdateVariantAnnotationsTableTask,
27-
)
2825
from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
2926
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
3027
from v03_pipeline.lib.tasks.reference_data.update_variant_annotations_table_with_updated_reference_dataset import (
@@ -81,20 +78,12 @@ def output(self) -> luigi.Target:
8178
)
8279

8380
def requires(self) -> list[luigi.Task]:
84-
if Env.REFERENCE_DATA_AUTO_UPDATE:
85-
requirements = [
86-
UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
87-
self.reference_genome,
88-
self.dataset_type,
89-
),
90-
]
91-
else:
92-
requirements = [
93-
BaseUpdateVariantAnnotationsTableTask(
94-
self.reference_genome,
95-
self.dataset_type,
96-
),
97-
]
81+
requirements = [
82+
UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
83+
self.reference_genome,
84+
self.dataset_type,
85+
),
86+
]
9887
if self.dataset_type.has_lookup_table:
9988
# NB: the lookup table task has remapped and subsetted callset tasks as dependencies.
10089
# Also note that force is passed here,

v03_pipeline/lib/tasks/write_relatedness_check_table.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,11 @@
55
from v03_pipeline.lib.methods.relatedness import call_relatedness
66
from v03_pipeline.lib.model import CachedReferenceDatasetQuery, Env
77
from v03_pipeline.lib.paths import (
8-
cached_reference_dataset_query_path,
98
relatedness_check_table_path,
109
)
1110
from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
1211
from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
13-
from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask
12+
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
1413
from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import (
1514
UpdatedCachedReferenceDatasetQuery,
1615
)
@@ -40,14 +39,6 @@ def requires(self) -> luigi.Task:
4039
UpdatedCachedReferenceDatasetQuery,
4140
crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
4241
)
43-
if Env.REFERENCE_DATA_AUTO_UPDATE
44-
else HailTableTask(
45-
cached_reference_dataset_query_path(
46-
self.reference_genome,
47-
self.dataset_type,
48-
CachedReferenceDatasetQuery.GNOMAD_QC,
49-
),
50-
)
5142
),
5243
]
5344
return requirements

0 commit comments

Comments
 (0)