|
| 1 | +import hail as hl |
| 2 | + |
| 3 | +from v03_pipeline.lib.annotations import sv |
| 4 | +from v03_pipeline.lib.migration.base_migration import BaseMigration |
| 5 | +from v03_pipeline.lib.model import DatasetType, ReferenceGenome |
| 6 | +from v03_pipeline.lib.reference_datasets.reference_dataset import ReferenceDataset |
| 7 | + |
| 8 | +# This vcf was generated with the gatk command |
| 9 | +PHASE_4_CALLSET_WITH_GNOMAD_V4 = 'gs://seqr-loading-temp/phase4.seqr.gnomad_v4.vcf.gz' |
| 10 | + |
| 11 | + |
| 12 | +class AddGnomadSVs(BaseMigration): |
| 13 | + reference_genome_dataset_types: frozenset[ |
| 14 | + tuple[ReferenceGenome, DatasetType] |
| 15 | + ] = frozenset( |
| 16 | + ((ReferenceGenome.GRCh38, DatasetType.SV),), |
| 17 | + ) |
| 18 | + |
| 19 | + @staticmethod |
| 20 | + def migrate(ht: hl.Table, **_) -> hl.Table: |
| 21 | + mapping_ht = hl.import_vcf( |
| 22 | + PHASE_4_CALLSET_WITH_GNOMAD_V4, ReferenceGenome.GRCh38.value, |
| 23 | + ).rows() |
| 24 | + ht = ht.annotate( |
| 25 | + **{ |
| 26 | + 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key][ |
| 27 | + 'info.GNOMAD_V4.1_TRUTH_VID' |
| 28 | + ], |
| 29 | + }, |
| 30 | + ) |
| 31 | + gnomad_svs_ht = ReferenceDataset.gnomad_svs.get_ht(ReferenceGenome.GRCh38) |
| 32 | + ht = ht.annotate(gnomad_svs=sv.gnomad_svs(ht, gnomad_svs_ht)) |
| 33 | + ht = ht.drop('info.GNOMAD_V4.1_TRUTH_VID') |
| 34 | + return ht.annotate_globals( |
| 35 | + versions=ht.globals.versions.annotate(gnomad_svs='1.0'), |
| 36 | + enums=ht.globals.enums.annotate(gnomad_svs=hl.Struct()), |
| 37 | + ) |
0 commit comments