Skip to content

Commit b263db4

Browse files
committed
merge
2 parents d880675 + ffee48a commit b263db4

24 files changed

+139
-86
lines changed

docker/bin/download_reference_data.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,8 @@ mkdir -p "/seqr-reference-data/GRCh${BUILD_VERSION}/${CLINVAR_HT}"
3434
cd "/seqr-reference-data/GRCh${BUILD_VERSION}"
3535
gsutil -m rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/all_reference_data/${REF_DATA_HT}" "./${REF_DATA_HT}"
3636
gsutil -m rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/clinvar/${CLINVAR_HT}" "./${CLINVAR_HT}"
37+
gsutil -m rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/validate_ht/common_noncoding_variants.grch${BUILD_VERSION}.ht" "./common_noncoding_variants.grch${BUILD_VERSION}.ht"
38+
gsutil -m rsync -r "gs://seqr-reference-data/GRCh${BUILD_VERSION}/validate_ht/common_coding_variants.grch${BUILD_VERSION}.ht" "./common_coding_variants.grch${BUILD_VERSION}.ht"
39+
40+
cd "/seqr-reference-data/"
41+
gsutil -m rsync -r "gs://hail-common/references/grch38_to_grch37.over.chain.gz" "./grch38_to_grch37.over.chain.gz"

docker/bin/load_data.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ python3 -m seqr_loading SeqrMTToESTask --local-scheduler \
2626
--reference-ht-path "/seqr-reference-data/${FULL_BUILD_VERSION}/combined_reference_data_grch${BUILD_VERSION}.ht" \
2727
--clinvar-ht-path "/seqr-reference-data/${FULL_BUILD_VERSION}/clinvar.${FULL_BUILD_VERSION}.ht" \
2828
--vep-config-json-path "/vep_configs/vep-${FULL_BUILD_VERSION}-loftee.json" \
29+
--grch38-to-grch37-ref-chain "/seqr-reference-data/grch38_to_grch37.over.chain.gz" \
30+
--GlobalConfig-validation-37-noncoding-ht "/seqr-reference-data/GRCh37/common_noncoding_variants.grch37.ht" \
31+
--GlobalConfig-validation-37-coding-ht "/seqr-reference-data/GRCh37/common_coding_variants.grch37.ht" \
32+
--GlobalConfig-validation-38-noncoding-ht "/seqr-reference-data/GRCh38/common_noncoding_variants.grch38.ht" \
33+
--GlobalConfig-validation-37-coding-ht "/seqr-reference-data/GRCh38/common_coding_variants.grch38.ht" \
2934
--es-host elasticsearch \
3035
--es-index-min-num-shards 1 \
3136
--sample-type "${SAMPLE_TYPE}" \

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ dynamic = ['version']
44
requires-python = ">=3.10"
55

66
[tool.setuptools]
7-
include-package-data = false
7+
include-package-data = true
8+
9+
[tool.setuptools.package-data]
10+
"*" = ["v03_pipeline*.over.chain.gz"]
811

912
[tool.setuptools.dynamic]
1013
version = {attr = "v03_pipeline.__version__"}

v03_pipeline/lib/annotations/fields_test.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,6 @@
1414
from v03_pipeline.lib.vep import run_vep
1515
from v03_pipeline.var.test.vep.mock_vep_data import MOCK_37_VEP_DATA, MOCK_38_VEP_DATA
1616

17-
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH = (
18-
'v03_pipeline/var/test/liftover/grch37_to_grch38.over.chain.gz'
19-
)
20-
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = (
21-
'v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz'
22-
)
2317
TEST_GNOMAD_NONCODING_CONSTRAINT_38_HT = 'v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_non_coding_constraint/1.0.ht'
2418
TEST_SCREEN_38_HT = 'v03_pipeline/var/test/reference_datasets/GRCh38/screen/1.0.ht'
2519

@@ -144,12 +138,9 @@ def test_get_formatting_fields(self, mock_vep: Mock) -> None:
144138
'gencode_ensembl_to_refseq_id_mapping': hl.dict(
145139
{'a': 'b'},
146140
),
147-
'grch38_to_grch37_liftover_ref_path': GRCH38_TO_GRCH37_LIFTOVER_REF_PATH,
148141
}
149142
if reference_genome == ReferenceGenome.GRCh38
150-
else {
151-
'grch37_to_grch38_liftover_ref_path': GRCH37_TO_GRCH38_LIFTOVER_REF_PATH,
152-
}
143+
else {}
153144
),
154145
dataset_type=DatasetType.SNV_INDEL,
155146
reference_genome=reference_genome,
@@ -208,7 +199,7 @@ def test_get_lookup_table_fields(
208199
list(
209200
get_fields(
210201
ht,
211-
DatasetType.SNV_INDEL.lookup_table_annotation_fns,
202+
DatasetType.SNV_INDEL.variant_frequency_annotation_fns,
212203
lookup_ht=lookup_ht,
213204
dataset_type=DatasetType.SNV_INDEL,
214205
reference_genome=ReferenceGenome.GRCh38,

v03_pipeline/lib/annotations/gcnv.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,11 @@ def end_locus(
5757
return hl.locus(ht.chr, ht.end, reference_genome.value)
5858

5959

60-
def gt_stats(ht: hl.Table, **_: Any) -> hl.Expression:
60+
def gt_stats(ht: hl.Table, callset_ht: hl.Table, **_: Any) -> hl.Expression:
6161
return hl.struct(
62-
AF=hl.float32(ht.sf),
63-
AC=ht.sc,
64-
AN=hl.int32(ht.sc / ht.sf),
62+
AF=hl.float32(callset_ht[ht.variant_id].sf),
63+
AC=callset_ht[ht.variant_id].sc,
64+
AN=hl.int32(callset_ht[ht.variant_id].sc / callset_ht[ht.variant_id].sf),
6565
Hom=hl.missing(hl.tint32),
6666
Het=hl.missing(hl.tint32),
6767
)
@@ -85,10 +85,9 @@ def QS(mt: hl.MatrixTable, **_: Any) -> hl.Expression: # noqa: N802
8585

8686
def rg37_locus(
8787
ht: hl.Table,
88-
grch38_to_grch37_liftover_ref_path: str,
8988
**_: Any,
9089
) -> hl.Expression | None:
91-
liftover.add_rg38_liftover(grch38_to_grch37_liftover_ref_path)
90+
liftover.add_rg38_liftover()
9291
return hl.liftover(
9392
start_locus(ht, ReferenceGenome.GRCh38),
9493
ReferenceGenome.GRCh37.value,
@@ -97,10 +96,9 @@ def rg37_locus(
9796

9897
def rg37_locus_end(
9998
ht: hl.Table,
100-
grch38_to_grch37_liftover_ref_path: str,
10199
**_: Any,
102100
) -> hl.Expression | None:
103-
liftover.add_rg38_liftover(grch38_to_grch37_liftover_ref_path)
101+
liftover.add_rg38_liftover()
104102
return hl.liftover(
105103
end_locus(ht, ReferenceGenome.GRCh38),
106104
ReferenceGenome.GRCh37.value,

v03_pipeline/lib/annotations/liftover.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
import hail as hl
22

3+
from v03_pipeline.lib.model.constants import (
4+
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH,
5+
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH,
6+
)
37
from v03_pipeline.lib.model.definitions import ReferenceGenome
48

59

6-
def add_rg38_liftover(grch38_to_grch37_liftover_ref_path: str) -> None:
10+
def add_rg38_liftover() -> None:
711
rg37 = hl.get_reference(ReferenceGenome.GRCh37.value)
812
rg38 = hl.get_reference(ReferenceGenome.GRCh38.value)
913
if not rg38.has_liftover(rg37):
10-
rg38.add_liftover(grch38_to_grch37_liftover_ref_path, rg37)
14+
rg38.add_liftover(GRCH38_TO_GRCH37_LIFTOVER_REF_PATH, rg37)
1115

1216

13-
def add_rg37_liftover(grch37_to_grch38_liftover_ref_path: str) -> None:
17+
def add_rg37_liftover() -> None:
1418
rg37 = hl.get_reference(ReferenceGenome.GRCh37.value)
1519
rg38 = hl.get_reference(ReferenceGenome.GRCh38.value)
1620
if not rg37.has_liftover(rg38):
17-
rg37.add_liftover(grch37_to_grch38_liftover_ref_path, rg38)
21+
rg37.add_liftover(GRCH37_TO_GRCH38_LIFTOVER_REF_PATH, rg38)
1822

1923

2024
def remove_liftover():

v03_pipeline/lib/annotations/shared.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ def rsid(mt: hl.MatrixTable, **_: Any) -> hl.Expression:
2525

2626
def rg37_locus(
2727
ht: hl.Table,
28-
grch38_to_grch37_liftover_ref_path: str,
2928
**_: Any,
3029
) -> hl.Expression | None:
31-
liftover.add_rg38_liftover(grch38_to_grch37_liftover_ref_path)
30+
liftover.add_rg38_liftover()
3231
return hl.liftover(ht.locus, ReferenceGenome.GRCh37.value)
3332

3433

v03_pipeline/lib/annotations/snv_indel.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,9 @@ def gnomad_non_coding_constraint(
8989

9090
def rg38_locus(
9191
ht: hl.Table,
92-
grch37_to_grch38_liftover_ref_path: str,
9392
**_: Any,
9493
) -> hl.Expression | None:
95-
liftover.add_rg37_liftover(grch37_to_grch38_liftover_ref_path)
94+
liftover.add_rg37_liftover()
9695
return hl.liftover(ht.locus, ReferenceGenome.GRCh38.value)
9796

9897

v03_pipeline/lib/annotations/sv.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,9 @@ def gt_stats(ht: hl.Table, **_: Any) -> hl.Expression:
194194

195195
def rg37_locus_end(
196196
ht: hl.Table,
197-
grch38_to_grch37_liftover_ref_path: str,
198197
**_: Any,
199198
) -> hl.Expression | None:
200-
liftover.add_rg38_liftover(grch38_to_grch37_liftover_ref_path)
199+
liftover.add_rg38_liftover()
201200
end = end_locus(ht)
202201
return hl.or_missing(
203202
hl.is_defined(end),

v03_pipeline/lib/model/constants.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from v03_pipeline.lib.model.feature_flag import FeatureFlag
2+
13
LOCAL_DISK_MOUNT_PATH = '/var/seqr'
24
PROJECTS_EXCLUDED_FROM_LOOKUP = {
35
'R0555_seqr_demo',
@@ -11,3 +13,13 @@
1113
'R0815_gregor_training_project_',
1214
'R0816_gregor_training_project_',
1315
}
16+
GRCH37_TO_GRCH38_LIFTOVER_REF_PATH = (
17+
'gs://hail-common/references/grch37_to_grch38.over.chain.gz'
18+
if FeatureFlag.RUN_PIPELINE_ON_DATAPROC
19+
else 'v03_pipeline/var/liftover/grch37_to_grch38.over.chain.gz'
20+
)
21+
GRCH38_TO_GRCH37_LIFTOVER_REF_PATH = (
22+
'gs://hail-common/references/grch38_to_grch37.over.chain.gz'
23+
if FeatureFlag.RUN_PIPELINE_ON_DATAPROC
24+
else 'v03_pipeline/var/liftover/grch38_to_grch37.over.chain.gz'
25+
)

0 commit comments

Comments
 (0)