Skip to content

Commit 2453532

Browse files
authored
Merge pull request #1012 from broadinstitute/benb/move_liftover_into_repo
feat: move liftover file into repo to remove dependency on gcs
2 parents b9df3b6 + f913424 commit 2453532

18 files changed

+44
-54
lines changed

docker/bin/download_reference_data.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,8 @@ mkdir -p "/seqr-reference-data/GRCh${BUILD_VERSION}/${CLINVAR_HT}"
3434
cd "/seqr-reference-data/GRCh${BUILD_VERSION}"
3535
gsutil -m rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/all_reference_data/${REF_DATA_HT}" "./${REF_DATA_HT}"
3636
gsutil -m rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/clinvar/${CLINVAR_HT}" "./${CLINVAR_HT}"
37+
gsutil -m rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/validate_ht/common_noncoding_variants.grch${BUILD_VERSION}.ht" "./common_noncoding_variants.grch${BUILD_VERSION}.ht"
38+
gsutil -m rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/validate_ht/common_coding_variants.grch${BUILD_VERSION}.ht" "./common_coding_variants.grch${BUILD_VERSION}.ht"
39+
40+
cd "/seqr-reference-data/"
41+
gsutil -m rsync -r "gs://hail-common/references/grch38_to_grch37.over.chain.gz" "./grch38_to_grch37.over.chain.gz"

docker/bin/load_data.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ python3 -m seqr_loading SeqrMTToESTask --local-scheduler \
2626
--reference-ht-path "/seqr-reference-data/${FULL_BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" \
2727
--clinvar-ht-path "/seqr-reference-data/${FULL_BUILD_VERSION}/clinvar.${FULL_BUILD_VERSION}.ht" \
2828
--vep-config-json-path "/vep_configs/vep-${FULL_BUILD_VERSION}-loftee.json" \
29+
--grch38-to-grch37-ref-chain "/seqr-reference-data/grch38_to_grch37.over.chain.gz" \
30+
--GlobalConfig-validation-37-noncoding-ht "/seqr-reference-data/GRCh37/common_noncoding_variants.grch37.ht" \
31+
--GlobalConfig-validation-37-coding-ht "/seqr-reference-data/GRCh37/common_coding_variants.grch37.ht" \
32+
--GlobalConfig-validation-38-noncoding-ht "/seqr-reference-data/GRCh38/common_noncoding_variants.grch38.ht" \
33+
--GlobalConfig-validation-37-coding-ht "/seqr-reference-data/GRCh38/common_coding_variants.grch38.ht" \
2934
--es-host elasticsearch \
3035
--es-index-min-num-shards 1 \
3136
--sample-type "${SAMPLE_TYPE}" \

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ dynamic = ['version']
44
requires-python = ">=3.10"
55

66
[tool.setuptools]
7-
include-package-data = false
7+
include-package-data = true
8+
9+
[tool.setuptools.package-data]
10+
"*" = ["v03_pipeline*.over.chain.gz"]
811

912
[tool.setuptools.dynamic]
1013
version = {attr = "v03_pipeline.__version__"}

v03_pipeline/lib/annotations/fields_test.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,6 @@
1414
from v03_pipeline.lib.vep import run_vep
1515
from v03_pipeline.var.test.vep.mock_vep_data import MOCK_37_VEP_DATA, MOCK_38_VEP_DATA
1616

17-
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH = (
18-
'v03_pipeline/var/test/liftover/grch37_to_grch38.over.chain.gz'
19-
)
20-
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = (
21-
'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz'
22-
)
2317
TEST_GNOMAD_NONCODING_CONSTRAINT_38_HT = 'v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_non_coding_constraint/1.0.ht'
2418
TEST_SCREEN_38_HT = 'v03_pipeline/var/test/reference_datasets/GRCh38/screen/1.0.ht'
2519

@@ -144,12 +138,9 @@ def test_get_formatting_fields(self, mock_vep: Mock) -> None:
144138
'gencode_ensembl_to_refseq_id_mapping': hl.dict(
145139
{'a': 'b'},
146140
),
147-
'grch38_to_grch37_liftover_ref_path': GRCH38_TO_GRCH37_LIFTOVER_REF_PATH,
148141
}
149142
if reference_genome == ReferenceGenome.GRCh38
150-
else {
151-
'grch37_to_grch38_liftover_ref_path': GRCH37_TO_GRCH38_LIFTOVER_REF_PATH,
152-
}
143+
else {}
153144
),
154145
dataset_type=DatasetType.SNV_INDEL,
155146
reference_genome=reference_genome,

v03_pipeline/lib/annotations/gcnv.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,9 @@ def QS(mt: hl.MatrixTable, **_: Any) -> hl.Expression: # noqa: N802
8585

8686
def rg37_locus(
8787
ht: hl.Table,
88-
grch38_to_grch37_liftover_ref_path: str,
8988
**_: Any,
9089
) -> hl.Expression | None:
91-
liftover.add_rg38_liftover(grch38_to_grch37_liftover_ref_path)
90+
liftover.add_rg38_liftover()
9291
return hl.liftover(
9392
start_locus(ht, ReferenceGenome.GRCh38),
9493
ReferenceGenome.GRCh37.value,
@@ -97,10 +96,9 @@ def rg37_locus(
9796

9897
def rg37_locus_end(
9998
ht: hl.Table,
100-
grch38_to_grch37_liftover_ref_path: str,
10199
**_: Any,
102100
) -> hl.Expression | None:
103-
liftover.add_rg38_liftover(grch38_to_grch37_liftover_ref_path)
101+
liftover.add_rg38_liftover()
104102
return hl.liftover(
105103
end_locus(ht, ReferenceGenome.GRCh38),
106104
ReferenceGenome.GRCh37.value,

v03_pipeline/lib/annotations/liftover.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
import hail as hl
22

3+
from v03_pipeline.lib.model.constants import (
4+
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH,
5+
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH,
6+
)
37
from v03_pipeline.lib.model.definitions import ReferenceGenome
48

59

6-
def add_rg38_liftover(grch38_to_grch37_liftover_ref_path: str) -> None:
10+
def add_rg38_liftover() -> None:
711
rg37 = hl.get_reference(ReferenceGenome.GRCh37.value)
812
rg38 = hl.get_reference(ReferenceGenome.GRCh38.value)
913
if not rg38.has_liftover(rg37):
10-
rg38.add_liftover(grch38_to_grch37_liftover_ref_path, rg37)
14+
rg38.add_liftover(GRCH38_TO_GRCH37_LIFTOVER_REF_PATH, rg37)
1115

1216

13-
def add_rg37_liftover(grch37_to_grch38_liftover_ref_path: str) -> None:
17+
def add_rg37_liftover() -> None:
1418
rg37 = hl.get_reference(ReferenceGenome.GRCh37.value)
1519
rg38 = hl.get_reference(ReferenceGenome.GRCh38.value)
1620
if not rg37.has_liftover(rg38):
17-
rg37.add_liftover(grch37_to_grch38_liftover_ref_path, rg38)
21+
rg37.add_liftover(GRCH37_TO_GRCH38_LIFTOVER_REF_PATH, rg38)
1822

1923

2024
def remove_liftover():

v03_pipeline/lib/annotations/shared.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ def rsid(mt: hl.MatrixTable, **_: Any) -> hl.Expression:
2525

2626
def rg37_locus(
2727
ht: hl.Table,
28-
grch38_to_grch37_liftover_ref_path: str,
2928
**_: Any,
3029
) -> hl.Expression | None:
31-
liftover.add_rg38_liftover(grch38_to_grch37_liftover_ref_path)
30+
liftover.add_rg38_liftover()
3231
return hl.liftover(ht.locus, ReferenceGenome.GRCh37.value)
3332

3433

v03_pipeline/lib/annotations/snv_indel.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,9 @@ def gnomad_non_coding_constraint(
8989

9090
def rg38_locus(
9191
ht: hl.Table,
92-
grch37_to_grch38_liftover_ref_path: str,
9392
**_: Any,
9493
) -> hl.Expression | None:
95-
liftover.add_rg37_liftover(grch37_to_grch38_liftover_ref_path)
94+
liftover.add_rg37_liftover()
9695
return hl.liftover(ht.locus, ReferenceGenome.GRCh38.value)
9796

9897

v03_pipeline/lib/annotations/sv.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,9 @@ def gt_stats(ht: hl.Table, **_: Any) -> hl.Expression:
194194

195195
def rg37_locus_end(
196196
ht: hl.Table,
197-
grch38_to_grch37_liftover_ref_path: str,
198197
**_: Any,
199198
) -> hl.Expression | None:
200-
liftover.add_rg38_liftover(grch38_to_grch37_liftover_ref_path)
199+
liftover.add_rg38_liftover()
201200
end = end_locus(ht)
202201
return hl.or_missing(
203202
hl.is_defined(end),

v03_pipeline/lib/model/constants.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from v03_pipeline.lib.model.feature_flag import FeatureFlag
2+
13
PROJECTS_EXCLUDED_FROM_LOOKUP = {
24
'R0555_seqr_demo',
35
'R0607_gregor_training_project_',
@@ -10,3 +12,13 @@
1012
'R0815_gregor_training_project_',
1113
'R0816_gregor_training_project_',
1214
}
15+
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH = (
16+
'gs://hail-common/references/grch37_to_grch38.over.chain.gz'
17+
if FeatureFlag.RUN_PIPELINE_ON_DATAPROC
18+
else 'v03_pipeline/var/liftover/grch37_to_grch38.over.chain.gz'
19+
)
20+
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = (
21+
'gs://hail-common/references/grch38_to_grch37.over.chain.gz'
22+
if FeatureFlag.RUN_PIPELINE_ON_DATAPROC
23+
else 'v03_pipeline/var/liftover/grch38_to_grch37.over.chain.gz'
24+
)

0 commit comments

Comments
 (0)