Skip to content

Commit f164733

Browse files
committed
gnomad v4 sv migration
1 parent 56fc218 commit f164733

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import hail as hl
2+
3+
from v03_pipeline.lib.annotations import sv
4+
from v03_pipeline.lib.migration.base_migration import BaseMigration
5+
from v03_pipeline.lib.model import DatasetType, ReferenceGenome
6+
from v03_pipeline.lib.reference_datasets.reference_dataset import ReferenceDataset
7+
8+
# This vcf was generated with the gatk command
9+
PHASE_4_CALLSET_WITH_GNOMAD_V4 = 'gs://seqr-loading-temp/phase4.seqr.gnomad_v4.vcf.gz'
10+
11+
12+
class AddGnomadSVs(BaseMigration):
13+
reference_genome_dataset_types: frozenset[
14+
tuple[ReferenceGenome, DatasetType]
15+
] = frozenset(
16+
((ReferenceGenome.GRCh38, DatasetType.SV),),
17+
)
18+
19+
@staticmethod
20+
def migrate(ht: hl.Table, **_) -> hl.Table:
21+
mapping_ht = hl.import_vcf(
22+
PHASE_4_CALLSET_WITH_GNOMAD_V4, ReferenceGenome.GRCh38.value,
23+
).rows()
24+
ht = ht.annotate(
25+
**{
26+
'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key][
27+
'info.GNOMAD_V4.1_TRUTH_VID'
28+
],
29+
},
30+
)
31+
gnomad_svs_ht = ReferenceDataset.gnomad_svs.get_ht(ReferenceGenome.GRCh38)
32+
ht = ht.annotate(gnomad_svs=sv.gnomad_svs(ht, gnomad_svs_ht))
33+
ht = ht.drop('info.GNOMAD_V4.1_TRUTH_VID')
34+
return ht.annotate_globals(
35+
versions=ht.globals.versions.annotate(gnomad_svs='1.0'),
36+
enums=ht.globals.enums.annotate(gnomad_svs=hl.Struct()),
37+
)

0 commit comments

Comments
 (0)