|
| 1 | +import hail as hl |
| 2 | + |
| 3 | +from v03_pipeline.lib.annotations import sv |
| 4 | +from v03_pipeline.lib.migration.base_migration import BaseMigration |
| 5 | +from v03_pipeline.lib.model import DatasetType, ReferenceGenome |
| 6 | +from v03_pipeline.lib.reference_datasets.reference_dataset import ReferenceDataset |
| 7 | + |
| 8 | +# This vcf was generated with the gatk command: |
| 9 | +# |
| 10 | +# gatk SVConcordance --verbosity DEBUG --evaluation /var/seqr/phase4.seqr.gnomad_v4_tmp.vcf.gz |
| 11 | +# --truth /var/seqr/gnomad.v4.1.sv.sites.modified.vcf.bgz |
| 12 | +# --sequence-dictionary gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict |
| 13 | +# |
| 14 | +# Followed by: |
| 15 | +# bcftools annotate --rename-annots /var/seqr/remap /var/seqr/phase4.seqr.gnomad_v4_tmp.vcf.gz | bgzip > /var/seqr/phase4.seqr.gnomad_v4.vcf.gz |
| 16 | +# |
| 17 | +# where remap contains "INFO/TRUTH_VID GNOMAD_V4.1_TRUTH_VID" |
| 18 | +PHASE_4_CALLSET_WITH_GNOMAD_V4 = 'gs://seqr-loading-temp/phase4.seqr.gnomad_v4.vcf.gz' |
| 19 | + |
| 20 | + |
| 21 | +class AddGnomadSVs(BaseMigration): |
| 22 | + reference_genome_dataset_types: frozenset[ |
| 23 | + tuple[ReferenceGenome, DatasetType] |
| 24 | + ] = frozenset( |
| 25 | + ((ReferenceGenome.GRCh38, DatasetType.SV),), |
| 26 | + ) |
| 27 | + |
| 28 | + @staticmethod |
| 29 | + def migrate(ht: hl.Table, **_) -> hl.Table: |
| 30 | + mapping_ht = ( |
| 31 | + hl.import_vcf( |
| 32 | + PHASE_4_CALLSET_WITH_GNOMAD_V4, |
| 33 | + reference_genome=ReferenceGenome.GRCh38.value, |
| 34 | + force_bgz=True, |
| 35 | + ) |
| 36 | + .key_rows_by('rsid') |
| 37 | + .rows() |
| 38 | + ) |
| 39 | + ht = ht.annotate( |
| 40 | + **{ |
| 41 | + 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key].info[ |
| 42 | + 'GNOMAD_V4.1_TRUTH_VID' |
| 43 | + ], |
| 44 | + }, |
| 45 | + ) |
| 46 | + gnomad_svs_ht = ReferenceDataset.gnomad_svs.get_ht(ReferenceGenome.GRCh38) |
| 47 | + ht = ht.annotate(gnomad_svs=sv.gnomad_svs(ht, gnomad_svs_ht)) |
| 48 | + ht = ht.drop('info.GNOMAD_V4.1_TRUTH_VID') |
| 49 | + return ht.annotate_globals( |
| 50 | + versions=ht.globals.versions.annotate(gnomad_svs='1.0'), |
| 51 | + enums=ht.globals.enums.annotate(gnomad_svs=hl.Struct()), |
| 52 | + ) |
0 commit comments