Skip to content

Commit 2aed82f

Browse files
authored
remove version prefix from paths for local users (#917)
* remove prefix from paths for local users * ruff * get * fix reference data path * correct usage * another bug
1 parent e15e33e commit 2aed82f

File tree

5 files changed

+36
-7
lines changed

5 files changed

+36
-7
lines changed

.github/workflows/unit-tests.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ jobs:
3939
export GRCH37_TO_GRCH38_LIFTOVER_REF_PATH=v03_pipeline/var/test/liftover/grch37_to_grch38.over.chain.gz
4040
export GRCH38_TO_GRCH37_LIFTOVER_REF_PATH=v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz
4141
export ACCESS_PRIVATE_REFERENCE_DATASETS=1
42+
export INCLUDE_PIPELINE_VERSION_IN_PREFIX=1
4243
export PYSPARK_SUBMIT_ARGS='--driver-memory 8G pyspark-shell'
4344
nosetests --with-coverage --cover-package v03_pipeline/lib v03_pipeline/lib
4445
coverage report --omit '*test*' --fail-under=75

v03_pipeline/lib/model/environment.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
)
4545
CHECK_SEX_AND_RELATEDNESS = os.environ.get('CHECK_SEX_AND_RELATEDNESS') == '1'
4646
EXPECT_WES_FILTERS = os.environ.get('EXPECT_WES_FILTERS') == '1'
47+
INCLUDE_PIPELINE_VERSION_IN_PREFIX = (
48+
os.environ.get('INCLUDE_PIPELINE_VERSION_IN_PREFIX') == '1'
49+
)
4750
SHOULD_REGISTER_ALLELES = os.environ.get('SHOULD_REGISTER_ALLELES') == '1'
4851
SHOULD_TRIGGER_HAIL_BACKEND_RELOAD = (
4952
os.environ.get('SHOULD_TRIGGER_HAIL_BACKEND_RELOAD') == '1'
@@ -56,12 +59,13 @@ class Env:
5659
ALLELE_REGISTRY_SECRET_NAME: str | None = ALLELE_REGISTRY_SECRET_NAME
5760
CHECK_SEX_AND_RELATEDNESS: bool = CHECK_SEX_AND_RELATEDNESS
5861
EXPECT_WES_FILTERS: bool = EXPECT_WES_FILTERS
62+
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH: str = GRCH37_TO_GRCH38_LIFTOVER_REF_PATH
63+
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH: str = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
5964
HAIL_BACKEND_SERVICE_HOSTNAME: str | None = HAIL_BACKEND_SERVICE_HOSTNAME
6065
HAIL_BACKEND_SERVICE_PORT: int = HAIL_BACKEND_SERVICE_PORT
6166
HAIL_TMP_DIR: str = HAIL_TMP_DIR
6267
HAIL_SEARCH_DATA_DIR: str = HAIL_SEARCH_DATA_DIR
63-
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH: str = GRCH37_TO_GRCH38_LIFTOVER_REF_PATH
64-
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH: str = GRCH38_TO_GRCH37_LIFTOVER_REF_PATH
68+
INCLUDE_PIPELINE_VERSION_IN_PREFIX: bool = INCLUDE_PIPELINE_VERSION_IN_PREFIX
6569
LOADING_DATASETS_DIR: str = LOADING_DATASETS_DIR
6670
PRIVATE_REFERENCE_DATASETS_DIR: str = PRIVATE_REFERENCE_DATASETS_DIR
6771
PROJECT_ID: str | None = PROJECT_ID

v03_pipeline/lib/paths.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,15 @@ def _pipeline_prefix(
1919
reference_genome: ReferenceGenome,
2020
dataset_type: DatasetType,
2121
) -> str:
22+
if Env.INCLUDE_PIPELINE_VERSION_IN_PREFIX:
23+
return os.path.join(
24+
root,
25+
PipelineVersion.V3_1.value,
26+
reference_genome.value,
27+
dataset_type.value,
28+
)
2229
return os.path.join(
2330
root,
24-
PipelineVersion.V3_1.value,
2531
reference_genome.value,
2632
dataset_type.value,
2733
)
@@ -30,16 +36,24 @@ def _pipeline_prefix(
3036
def _v03_reference_data_prefix(
3137
access_control: AccessControl,
3238
reference_genome: ReferenceGenome,
39+
dataset_type: DatasetType,
3340
) -> str:
3441
root = (
3542
Env.PRIVATE_REFERENCE_DATASETS_DIR
3643
if access_control == AccessControl.PRIVATE
3744
else Env.REFERENCE_DATASETS_DIR
3845
)
46+
if Env.INCLUDE_PIPELINE_VERSION_IN_PREFIX:
47+
return os.path.join(
48+
root,
49+
PipelineVersion.V03.value,
50+
reference_genome.value,
51+
dataset_type.value,
52+
)
3953
return os.path.join(
4054
root,
41-
PipelineVersion.V03.value,
4255
reference_genome.value,
56+
dataset_type.value,
4357
)
4458

4559

@@ -52,8 +66,8 @@ def cached_reference_dataset_query_path(
5266
_v03_reference_data_prefix(
5367
AccessControl.PUBLIC,
5468
reference_genome,
69+
dataset_type,
5570
),
56-
dataset_type.value,
5771
'cached_reference_dataset_queries',
5872
f'{cached_reference_dataset_query.value}.ht',
5973
)
@@ -267,8 +281,8 @@ def valid_reference_dataset_collection_path(
267281
_v03_reference_data_prefix(
268282
reference_dataset_collection.access_control,
269283
reference_genome,
284+
dataset_type,
270285
),
271-
dataset_type.value,
272286
'reference_datasets',
273287
f'{reference_dataset_collection.value}.ht',
274288
)

v03_pipeline/lib/paths_test.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ def test_family_table_path(self) -> None:
6161
),
6262
'gs://seqr-datasets/v3.1/GRCh37/SNV_INDEL/families/WES/franklin.ht',
6363
)
64+
mock_env.INCLUDE_PIPELINE_VERSION_IN_PREFIX = False
65+
self.assertEqual(
66+
family_table_path(
67+
ReferenceGenome.GRCh37,
68+
DatasetType.SNV_INDEL,
69+
SampleType.WES,
70+
'franklin',
71+
),
72+
'gs://seqr-datasets/GRCh37/SNV_INDEL/families/WES/franklin.ht',
73+
)
6474

6575
def test_valid_filters_path(self) -> None:
6676
self.assertEqual(

v03_pipeline/lib/test/mocked_dataroot_testcase.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def setUp(self) -> None:
1313
self.mock_env = patcher.start()
1414
self.addCleanup(patcher.stop) # https://stackoverflow.com/a/37534051
1515
for field_name in Env.__dataclass_fields__:
16-
if 'DATA' in field_name or 'DIR' in field_name:
16+
if field_name.endswith('_DIR'):
1717
setattr(self.mock_env, field_name, tempfile.TemporaryDirectory().name)
1818

1919
def tearDown(self) -> None:

0 commit comments

Comments
 (0)