Skip to content

Commit 7f7158c

Browse files
authored
Feature flag reference data update (#692)
* Feature flag reference data update * ruff * Add flag to unit tests * Update update_variant_annotations_table_with_new_samples.py
1 parent 73b1fc3 commit 7f7158c

File tree

4 files changed

+31
-15
lines changed

4 files changed

+31
-15
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ jobs:
3434
run: ruff . --output-format github
3535
- name: Unit Tests
3636
run: |
37+
export REFERENCE_DATA_AUTO_UPDATE=1
3738
export ACCESS_PRIVATE_REFERENCE_DATASETS=1
3839
export PYSPARK_SUBMIT_ARGS='--driver-memory 8G pyspark-shell'
3940
nosetests --with-coverage --cover-package v03_pipeline/lib v03_pipeline/lib

v03_pipeline/lib/model/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
os.environ.get('ACCESS_PRIVATE_REFERENCE_DATASETS') == '1'
77
)
88
CHECK_SEX_AND_RELATEDNESS = os.environ.get('CHECK_SEX_AND_RELATEDNESS') == '1'
9+
REFERENCE_DATA_AUTO_UPDATE = os.environ.get('REFERENCE_DATA_AUTO_UPDATE') == '1'
910
HAIL_TMPDIR = os.environ.get('HAIL_TMPDIR', '/tmp') # noqa: S108
1011
HAIL_SEARCH_DATA = os.environ.get('HAIL_SEARCH_DATA', '/hail-search-data')
1112
LOADING_DATASETS = os.environ.get('LOADING_DATASETS', '/seqr-loading-temp')
@@ -23,6 +24,7 @@
2324
class Env:
2425
ACCESS_PRIVATE_REFERENCE_DATASETS: bool = ACCESS_PRIVATE_REFERENCE_DATASETS
2526
CHECK_SEX_AND_RELATEDNESS: bool = CHECK_SEX_AND_RELATEDNESS
27+
REFERENCE_DATA_AUTO_UPDATE: bool = REFERENCE_DATA_AUTO_UPDATE
2628
HAIL_TMPDIR: str = HAIL_TMPDIR
2729
HAIL_SEARCH_DATA: str = HAIL_SEARCH_DATA
2830
LOADING_DATASETS: str = LOADING_DATASETS

v03_pipeline/lib/tasks/base/base_variant_annotations_table.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import hail as hl
22
import luigi
33

4-
from v03_pipeline.lib.model import ReferenceDatasetCollection
4+
from v03_pipeline.lib.model import Env, ReferenceDatasetCollection
55
from v03_pipeline.lib.paths import (
66
valid_reference_dataset_collection_path,
77
variant_annotations_table_path,
88
)
99
from v03_pipeline.lib.tasks.base.base_update_task import BaseUpdateTask
10-
from v03_pipeline.lib.tasks.files import GCSorLocalTarget
10+
from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask
1111
from v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection import (
1212
UpdatedReferenceDatasetCollectionTask,
1313
)
@@ -40,11 +40,21 @@ def output(self) -> luigi.Target:
4040

4141
def requires(self) -> list[luigi.Task]:
4242
return [
43-
UpdatedReferenceDatasetCollectionTask(
44-
self.reference_genome,
45-
self.dataset_type,
46-
self.sample_type,
47-
rdc,
43+
(
44+
UpdatedReferenceDatasetCollectionTask(
45+
self.reference_genome,
46+
self.dataset_type,
47+
self.sample_type,
48+
rdc,
49+
)
50+
if Env.REFERENCE_DATA_AUTO_UPDATE
51+
else HailTableTask(
52+
valid_reference_dataset_collection_path(
53+
self.reference_genome,
54+
self.dataset_type,
55+
rdc,
56+
),
57+
)
4858
)
4959
for rdc in ReferenceDatasetCollection.for_reference_genome_dataset_type(
5060
self.reference_genome,

v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from v03_pipeline.lib.annotations.fields import get_fields
99
from v03_pipeline.lib.misc.math import constrain
1010
from v03_pipeline.lib.misc.util import callset_project_pairs
11-
from v03_pipeline.lib.model import ReferenceDatasetCollection
11+
from v03_pipeline.lib.model import Env, ReferenceDatasetCollection
1212
from v03_pipeline.lib.paths import (
1313
remapped_and_subsetted_callset_path,
1414
sample_lookup_table_path,
@@ -76,13 +76,16 @@ def other_annotation_dependencies(self) -> dict[str, hl.Table]:
7676
return annotation_dependencies
7777

7878
def requires(self) -> list[luigi.Task]:
79-
upstream_table_tasks: list[luigi.Task] = [
80-
UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
81-
self.reference_genome,
82-
self.dataset_type,
83-
self.sample_type,
84-
),
85-
]
79+
if Env.REFERENCE_DATA_AUTO_UPDATE:
80+
upstream_table_tasks: list[luigi.Task] = [
81+
UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
82+
self.reference_genome,
83+
self.dataset_type,
84+
self.sample_type,
85+
),
86+
]
87+
else:
88+
upstream_table_tasks: list[luigi.Task] = []
8689
if self.dataset_type.has_sample_lookup_table:
8790
# NB: the sample lookup table task has remapped and subsetted callset tasks as dependencies.
8891
upstream_table_tasks.extend(

0 commit comments

Comments
 (0)