Skip to content

Commit a444f2c

Browse files
authored
rename vep data env var (#898)
* rename vep data env var * lint
1 parent 787fcc1 commit a444f2c

File tree

5 files changed

+22
-14
lines changed

5 files changed

+22
-14
lines changed

v03_pipeline/bin/dataproc_vep_init.bash

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ EOF
5252
gcc -Wall -Werror -O2 /vep.c -o /vep
5353
chmod u+s /vep
5454

55-
gcloud storage cp gs://seqr-luigi/releases/$ENVIRONMENT/latest/bin/download_vep_data.bash /download_vep_data.bash
56-
chmod +x /download_vep_data.bash
57-
./download_vep_data.bash $REFERENCE_GENOME
55+
gcloud storage cp gs://seqr-luigi/releases/$ENVIRONMENT/latest/bin/download_vep_reference_data.bash /download_vep_reference_data.bash
56+
chmod +x /download_vep_reference_data.bash
57+
./download_vep_reference_data.bash $REFERENCE_GENOME
5858

5959
gcloud storage cp gs://seqr-luigi/releases/$ENVIRONMENT/latest/bin/vep /vep.bash
6060
chmod +x /vep.bash

v03_pipeline/bin/download_vep_data.bash renamed to v03_pipeline/bin/download_vep_reference_data.bash

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -eux
44

55
REFERENCE_GENOME=$1
6-
VEP_DATA=/seqr/vep_data
6+
VEP_REFERENCE_DATASETS_DIR=${VEP_REFERENCE_DATASETS_DIR:-/seqr/vep-reference-data}
77

88
case $REFERENCE_GENOME in
99
GRCh38)
@@ -43,20 +43,20 @@ case $REFERENCE_GENOME in
4343
exit 1
4444
esac
4545

46-
if [ -f $VEP_DATA/$REFERENCE_GENOME/_SUCCESS ]; then
46+
if [ -f $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME/_SUCCESS ]; then
4747
echo "Skipping download because already successful"
4848
exit 0;
4949
fi
5050

51-
mkdir -p $VEP_DATA/$REFERENCE_GENOME;
51+
mkdir -p $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME;
5252
for vep_reference_data_file in ${VEP_REFERENCE_DATA_FILES[@]}; do
5353
if [[ $vep_reference_data_file == *.tar.gz ]]; then
5454
echo "Downloading and extracting" $vep_reference_data_file;
55-
gsutil cat $vep_reference_data_file | tar -xzf - -C $VEP_DATA/$REFERENCE_GENOME/ &
55+
gsutil cat $vep_reference_data_file | tar -xzf - -C $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME/ &
5656
else
5757
echo "Downloading" $vep_reference_data_file;
58-
gsutil cat $vep_reference_data_file $VEP_DATA/$REFERENCE_GENOME/ &
58+
gsutil cat $vep_reference_data_file $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME/ &
5959
fi
6060
done;
6161
wait
62-
touch $VEP_DATA/$REFERENCE_GENOME/_SUCCESS
62+
touch $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME/_SUCCESS

v03_pipeline/bin/vep

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
set -eux
44

55
REFERENCE_GENOME=$1
6-
VEP_DATA=/seqr/vep_data
6+
VEP_REFERENCE_DATASETS_DIR=${VEP_REFERENCE_DATASETS_DIR:-/seqr/vep-reference-data}
77
VEP_DOCKER_IMAGE="gcr.io/seqr-project/vep-docker-image"
88

99
case $REFERENCE_GENOME in
@@ -17,5 +17,5 @@ case $REFERENCE_GENOME in
1717
esac
1818

1919
shift # Remove the REFERENCE_GENOME arg.
20-
docker run --platform linux/amd64 -i -v $VEP_DATA/$REFERENCE_GENOME:/opt/vep/.vep/:ro $VEP_DOCKER_IMAGE:$REFERENCE_GENOME \
20+
docker run --platform linux/amd64 -i -v $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME:/opt/vep/.vep/:ro $VEP_DOCKER_IMAGE:$REFERENCE_GENOME \
2121
/opt/vep/src/ensembl-vep/vep $@

v03_pipeline/lib/model/environment.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
'REFERENCE_DATASETS_DIR',
2222
'/seqr/seqr-reference-data',
2323
)
24+
VEP_REFERENCE_DATASETS_DIR = os.environ.get(
25+
'VEP_REFERENCE_DATASETS_DIR',
26+
'/seqr/vep-reference-data',
27+
)
2428

2529
# Allele registry secrets :/
2630
ALLELE_REGISTRY_SECRET_NAME = os.environ.get('ALLELE_REGISTRY_SECRET_NAME', None)
@@ -50,3 +54,4 @@ class Env:
5054
PROJECT_ID: str | None = PROJECT_ID
5155
REFERENCE_DATASETS_DIR: str = REFERENCE_DATASETS_DIR
5256
SHOULD_REGISTER_ALLELES: bool = SHOULD_REGISTER_ALLELES
57+
VEP_REFERENCE_DATASETS_DIR: str = VEP_REFERENCE_DATASETS_DIR

v03_pipeline/lib/vep.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
import hail as hl
44

5-
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
5+
from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome
66

77
VEP_CONFIG_URI = Template(
8-
'file:///seqr/vep_data/$reference_genome/vep-$reference_genome.json',
8+
'file://$vep_reference_datasets_dir/$reference_genome/vep-$reference_genome.json',
99
)
1010

1111

@@ -18,7 +18,10 @@ def run_vep(
1818
return ht
1919
return hl.vep(
2020
ht,
21-
config=VEP_CONFIG_URI.substitute(reference_genome=reference_genome.value),
21+
config=VEP_CONFIG_URI.substitute(
22+
vep_reference_datasets_dir=Env.VEP_REFERENCE_DATASETS_DIR,
23+
reference_genome=reference_genome.value,
24+
),
2225
name='vep',
2326
block_size=1000,
2427
tolerate_parse_error=True,

0 commit comments

Comments
 (0)