Skip to content

Commit c80c04b

Browse files
authored
Merge pull request #1048 from broadinstitute/benb/sv_gnomad_v4_migration
gnomad v4 sv migration
2 parents 189a1cc + 4278cc3 commit c80c04b

File tree

1 file changed

+52
-0
lines changed

1 file changed

+52
-0
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import hail as hl
2+
3+
from v03_pipeline.lib.annotations import sv
4+
from v03_pipeline.lib.migration.base_migration import BaseMigration
5+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
6+
from v03_pipeline.lib.reference_datasets.reference_dataset import ReferenceDataset
7+
8+
# This vcf was generated with the gatk command:
9+
#
10+
# gatk SVConcordance --verbosity DEBUG --evaluation /var/seqr/phase4.seqr.gnomad_v4_tmp.vcf.gz
11+
# --truth /var/seqr/gnomad.v4.1.sv.sites.modified.vcf.bgz
12+
# --sequence-dictionary gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict
13+
#
14+
# Followed by:
15+
# bcftools annotate --rename-annots /var/seqr/remap /var/seqr/phase4.seqr.gnomad_v4_tmp.vcf.gz | bgzip > /var/seqr/phase4.seqr.gnomad_v4.vcf.gz
16+
#
17+
# where remap contains "INFO/TRUTH_VID GNOMAD_V4.1_TRUTH_VID"
18+
PHASE_4_CALLSET_WITH_GNOMAD_V4 = 'gs://seqr-loading-temp/phase4.seqr.gnomad_v4.vcf.gz'
19+
20+
21+
class AddGnomadSVs(BaseMigration):
22+
reference_genome_dataset_types: frozenset[
23+
tuple[ReferenceGenome, DatasetType]
24+
] = frozenset(
25+
((ReferenceGenome.GRCh38, DatasetType.SV),),
26+
)
27+
28+
@staticmethod
29+
def migrate(ht: hl.Table, **_) -> hl.Table:
30+
mapping_ht = (
31+
hl.import_vcf(
32+
PHASE_4_CALLSET_WITH_GNOMAD_V4,
33+
reference_genome=ReferenceGenome.GRCh38.value,
34+
force_bgz=True,
35+
)
36+
.key_rows_by('rsid')
37+
.rows()
38+
)
39+
ht = ht.annotate(
40+
**{
41+
'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key].info[
42+
'GNOMAD_V4.1_TRUTH_VID'
43+
],
44+
},
45+
)
46+
gnomad_svs_ht = ReferenceDataset.gnomad_svs.get_ht(ReferenceGenome.GRCh38)
47+
ht = ht.annotate(gnomad_svs=sv.gnomad_svs(ht, gnomad_svs_ht))
48+
ht = ht.drop('info.GNOMAD_V4.1_TRUTH_VID')
49+
return ht.annotate_globals(
50+
versions=ht.globals.versions.annotate(gnomad_svs='1.0'),
51+
enums=ht.globals.enums.annotate(gnomad_svs=hl.Struct()),
52+
)

0 commit comments

Comments
 (0)