Skip to content

Commit d5ae917

Browse files
committed
Bump a few versions
1 parent 7835d23 commit d5ae917

File tree

95 files changed

+32
-8
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+32
-8
lines changed

v03_pipeline/lib/misc/nested_field.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,7 @@ def parse_nested_field(t: hl.MatrixTable | hl.Table, fields: str):
1212
]
1313
else:
1414
expression = expression[field]
15+
# Parse float64s into float32s to save space!
16+
if expression.dtype == hl.tfloat64:
17+
expression = hl.float32(expression)
1518
return expression

v03_pipeline/lib/reference_datasets/eigen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33

44
def get_ht(path: str, *_) -> hl.Table:
55
ht = hl.read_table(path)
6-
ht = ht.select(Eigen_phred=ht.info['Eigen-phred'])
6+
ht = ht.select(Eigen_phred=hl.float32(ht.info['Eigen-phred']))
77
return ht.group_by(*ht.key).aggregate(Eigen_phred=hl.agg.max(ht.Eigen_phred))

v03_pipeline/lib/reference_datasets/exac.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from v03_pipeline.lib.reference_datasets.misc import vcf_to_ht
66

77
SELECT = {
8-
'AF_POPMAX': 'info.POPMAX',
98
'AF': 'info.AF#',
109
'AC_Adj': 'info.AC_Adj#',
1110
'AC_Het': 'info.AC_Het#',
@@ -18,5 +17,12 @@
1817
def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table:
1918
ht = vcf_to_ht(path, reference_genome, split_multi=True)
2019
return ht.select(
20+
AF_POPMAX=hl.or_missing(
21+
ht.info.AC_POPMAX[ht.a_index - 1] != 'NA',
22+
hl.float32(
23+
hl.int32(ht.info.AC_POPMAX[ht.a_index - 1])
24+
/ hl.int32(ht.info.AN_POPMAX[ht.a_index - 1]),
25+
),
26+
),
2127
**{k: parse_nested_field(ht, v) for k, v in SELECT.items()},
2228
)

v03_pipeline/lib/reference_datasets/hmtvar.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def get_ht(
1919
reference_genome.value,
2020
),
2121
alleles=hl.array([ht.ref_rCRS, ht.alt]),
22-
score=ht.disease_score,
22+
score=hl.float32(ht.disease_score),
2323
)
2424
ht = ht.key_by('locus', 'alleles')
2525
ht = ht.filter(

v03_pipeline/lib/reference_datasets/reference_dataset.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,14 +204,14 @@ def get_ht(
204204
ReferenceDataset.eigen: {
205205
ReferenceGenome.GRCh37: {
206206
DATASET_TYPES: frozenset([DatasetType.SNV_INDEL]),
207-
VERSION: '1.0',
207+
VERSION: '1.1',
208208
# NB: The download link on the Eigen website (http://www.columbia.edu/~ii2135/download.html) is broken
209209
# as of 11/15/24 so we will host the data
210210
PATH: 'gs://seqr-reference-data/GRCh37/eigen/EIGEN_coding_noncoding.grch37.ht',
211211
},
212212
ReferenceGenome.GRCh38: {
213213
DATASET_TYPES: frozenset([DatasetType.SNV_INDEL]),
214-
VERSION: '1.0',
214+
VERSION: '1.1',
215215
PATH: 'gs://seqr-reference-data/GRCh38/eigen/EIGEN_coding_noncoding.liftover_grch38.ht',
216216
},
217217
},
@@ -232,12 +232,12 @@ def get_ht(
232232
ReferenceDataset.exac: {
233233
ReferenceGenome.GRCh37: {
234234
DATASET_TYPES: frozenset([DatasetType.SNV_INDEL]),
235-
VERSION: '1.0',
235+
VERSION: '1.1',
236236
PATH: 'gs://gcp-public-data--gnomad/legacy/exacv1_downloads/release1/ExAC.r1.sites.vep.vcf.gz',
237237
},
238238
ReferenceGenome.GRCh38: {
239239
DATASET_TYPES: frozenset([DatasetType.SNV_INDEL]),
240-
VERSION: '1.0',
240+
VERSION: '1.1',
241241
# NB: Exac is only available on GRCh37 so we host a lifted over version
242242
PATH: 'gs://seqr-reference-data/GRCh38/gnomad/ExAC.r1.sites.liftover.b38.vcf.gz',
243243
},
@@ -294,7 +294,7 @@ def get_ht(
294294
ReferenceDataset.hmtvar: {
295295
ReferenceGenome.GRCh38: {
296296
DATASET_TYPES: frozenset([DatasetType.MITO]),
297-
VERSION: '1.0',
297+
VERSION: '1.1',
298298
# NB: https://www.hmtvar.uniba.it is unavailable as of 11/15/24 so we will host the data
299299
PATH: 'https://storage.googleapis.com/seqr-reference-data/GRCh38/mitochondrial/HmtVar/HmtVar%20Jan.%2010%202022.json',
300300
},
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
2+
Written with version 0.2.133-4c60fddb171a
3+
Created at 2024/12/02 19:45:01

v03_pipeline/var/test/reference_datasets/GRCh37/eigen/1.1.ht/_SUCCESS

Whitespace-only changes.

0 commit comments

Comments
 (0)