From f164733a3c2d2f8540a3163512a38da3530f309c Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 19 Feb 2025 15:51:31 -0500 Subject: [PATCH 01/18] gnomad v4 sv migration --- .../annotations/0004_add_gnomad_svs.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py new file mode 100644 index 000000000..231eef37a --- /dev/null +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -0,0 +1,37 @@ +import hail as hl + +from v03_pipeline.lib.annotations import sv +from v03_pipeline.lib.migration.base_migration import BaseMigration +from v03_pipeline.lib.model import DatasetType, ReferenceGenome +from v03_pipeline.lib.reference_datasets.reference_dataset import ReferenceDataset + +# This vcf was generated with the gatk command +PHASE_4_CALLSET_WITH_GNOMAD_V4 = 'gs://seqr-loading-temp/phase4.seqr.gnomad_v4.vcf.gz' + + +class AddGnomadSVs(BaseMigration): + reference_genome_dataset_types: frozenset[ + tuple[ReferenceGenome, DatasetType] + ] = frozenset( + ((ReferenceGenome.GRCh38, DatasetType.SV),), + ) + + @staticmethod + def migrate(ht: hl.Table, **_) -> hl.Table: + mapping_ht = hl.import_vcf( + PHASE_4_CALLSET_WITH_GNOMAD_V4, ReferenceGenome.GRCh38.value, + ).rows() + ht = ht.annotate( + **{ + 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key][ + 'info.GNOMAD_V4.1_TRUTH_VID' + ], + }, + ) + gnomad_svs_ht = ReferenceDataset.gnomad_svs.get_ht(ReferenceGenome.GRCh38) + ht = ht.annotate(gnomad_svs=sv.gnomad_svs(ht, gnomad_svs_ht)) + ht = ht.drop('info.GNOMAD_V4.1_TRUTH_VID') + return ht.annotate_globals( + versions=ht.globals.versions.annotate(gnomad_svs='1.0'), + enums=ht.globals.enums.annotate(gnomad_svs=hl.Struct()), + ) From e8d96167eb5bde11240a48409adafab48dde37d9 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 19 Feb 2025 16:31:42 -0500 Subject: [PATCH 02/18] ruff --- v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index 231eef37a..ccc9ed5fa 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -19,7 +19,8 @@ class AddGnomadSVs(BaseMigration): @staticmethod def migrate(ht: hl.Table, **_) -> hl.Table: mapping_ht = hl.import_vcf( - PHASE_4_CALLSET_WITH_GNOMAD_V4, ReferenceGenome.GRCh38.value, + PHASE_4_CALLSET_WITH_GNOMAD_V4, + ReferenceGenome.GRCh38.value, ).rows() ht = ht.annotate( **{ From abeb65eba1535d39f4ae9a32e890115939614700 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Feb 2025 05:00:01 -0500 Subject: [PATCH 03/18] Update 0004_add_gnomad_svs.py --- v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index ccc9ed5fa..20f860ae9 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -20,7 +20,8 @@ class AddGnomadSVs(BaseMigration): def migrate(ht: hl.Table, **_) -> hl.Table: mapping_ht = hl.import_vcf( PHASE_4_CALLSET_WITH_GNOMAD_V4, - ReferenceGenome.GRCh38.value, + reference_genome=ReferenceGenome.GRCh38.value, + force_bgz=True ).rows() ht = ht.annotate( **{ From e88c303da760846cc899c417022304953913ea17 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Feb 2025 05:19:27 -0500 Subject: [PATCH 04/18] Update 0004_add_gnomad_svs.py --- v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index 20f860ae9..751b54009 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -22,7 +22,7 @@ def migrate(ht: hl.Table, **_) -> hl.Table: PHASE_4_CALLSET_WITH_GNOMAD_V4, reference_genome=ReferenceGenome.GRCh38.value, force_bgz=True - ).rows() + ).annotate_rows(variant_id=mt.rsid).key_rows_by(mt.variant_id).rows() ht = ht.annotate( **{ 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key][ From 3e3658e33b01d99fbe99e08f0304675ed3463c55 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Feb 2025 05:20:43 -0500 Subject: [PATCH 05/18] Update 0004_add_gnomad_svs.py --- v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index 751b54009..76c6c0139 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -22,7 +22,7 @@ def migrate(ht: hl.Table, **_) -> hl.Table: PHASE_4_CALLSET_WITH_GNOMAD_V4, reference_genome=ReferenceGenome.GRCh38.value, force_bgz=True - ).annotate_rows(variant_id=mt.rsid).key_rows_by(mt.variant_id).rows() + ).annotate_rows(variant_id=mapping_ht.rsid).key_rows_by(mapping_ht.variant_id).rows() ht = ht.annotate( **{ 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key][ From 282bad2a396eb5020f6a0053e4ca60a102cd22f5 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Feb 2025 05:24:29 -0500 Subject: [PATCH 06/18] Update 0004_add_gnomad_svs.py --- v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index 76c6c0139..e27167fcf 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -22,7 +22,7 @@ def migrate(ht: hl.Table, **_) -> hl.Table: PHASE_4_CALLSET_WITH_GNOMAD_V4, reference_genome=ReferenceGenome.GRCh38.value, force_bgz=True - ).annotate_rows(variant_id=mapping_ht.rsid).key_rows_by(mapping_ht.variant_id).rows() + ).key_rows_by('rsid').rows() ht = ht.annotate( **{ 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key][ From cf5875ccbb5a06d4d022e7fe4e6486df4cf24883 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Feb 2025 06:02:46 -0500 Subject: [PATCH 07/18] Update 0004_add_gnomad_svs.py --- v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index e27167fcf..88f4ea526 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -25,8 +25,8 @@ def migrate(ht: hl.Table, **_) -> hl.Table: ).key_rows_by('rsid').rows() ht = ht.annotate( **{ - 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key][ - 'info.GNOMAD_V4.1_TRUTH_VID' + 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key].info[ + 'GNOMAD_V4.1_TRUTH_VID' ], }, ) From 026631fd2d8e2102ad1e93f37252ba33d85129b5 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Feb 2025 06:05:21 -0500 Subject: [PATCH 08/18] comment --- .../migrations/annotations/0004_add_gnomad_svs.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index ccc9ed5fa..75c677476 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -5,7 +5,16 @@ from v03_pipeline.lib.model import DatasetType, ReferenceGenome from v03_pipeline.lib.reference_datasets.reference_dataset import ReferenceDataset -# This vcf was generated with the gatk command +# This vcf was generated with the gatk command: +# +# gatk SVConcordance --verbosity DEBUG --evaluation /var/seqr/phase4.seqr.gnomad_v4_tmp.vcf.gz +# --truth /var/seqr/gnomad.v4.1.sv.sites.modified.vcf.bgz +# --sequence-dictionary gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict +# +# Followed by: +# bcftools annotate --rename-annots /var/seqr/remap /var/seqr/phase4.seqr.gnomad_v4_tmp.vcf.gz | bgzip > /var/seqr/phase4.seqr.gnomad_v4.vcf.gz +# +# where remap contains "INFO/TRUTH_VID GNOMAD_V4.1_TRUTH_VID" PHASE_4_CALLSET_WITH_GNOMAD_V4 = 'gs://seqr-loading-temp/phase4.seqr.gnomad_v4.vcf.gz' From 4b3f67841ca0acf76733d11009716652878195ef Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Thu, 20 Feb 2025 06:06:29 -0500 Subject: [PATCH 09/18] ruff --- .../migrations/annotations/0004_add_gnomad_svs.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index d670e2705..3cb3a4c98 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -27,11 +27,15 @@ class AddGnomadSVs(BaseMigration): @staticmethod def migrate(ht: hl.Table, **_) -> hl.Table: - mapping_ht = hl.import_vcf( - PHASE_4_CALLSET_WITH_GNOMAD_V4, - reference_genome=ReferenceGenome.GRCh38.value, - force_bgz=True - ).key_rows_by('rsid').rows() + mapping_ht = ( + hl.import_vcf( + PHASE_4_CALLSET_WITH_GNOMAD_V4, + reference_genome=ReferenceGenome.GRCh38.value, + force_bgz=True, + ) + .key_rows_by('rsid') + .rows() + ) ht = ht.annotate( **{ 'info.GNOMAD_V4.1_TRUTH_VID': mapping_ht[ht.key].info[ From 21d052736fefffd78d2c1a655730e77722d41c3b Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Mon, 3 Mar 2025 15:37:15 -0500 Subject: [PATCH 10/18] Update 0004_add_gnomad_svs.py --- v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index 3cb3a4c98..14b1e48f6 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -45,8 +45,4 @@ def migrate(ht: hl.Table, **_) -> hl.Table: ) gnomad_svs_ht = ReferenceDataset.gnomad_svs.get_ht(ReferenceGenome.GRCh38) ht = ht.annotate(gnomad_svs=sv.gnomad_svs(ht, gnomad_svs_ht)) - ht = ht.drop('info.GNOMAD_V4.1_TRUTH_VID') - return ht.annotate_globals( - versions=ht.globals.versions.annotate(gnomad_svs='1.0'), - enums=ht.globals.enums.annotate(gnomad_svs=hl.Struct()), - ) + return ht.drop('info.GNOMAD_V4.1_TRUTH_VID') From bfde429b6f41b4f67dbd52b83bf794fc383a9941 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 4 Mar 2025 12:13:27 -0500 Subject: [PATCH 11/18] Update 0004_add_gnomad_svs.py --- v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py index 14b1e48f6..3cb3a4c98 100644 --- a/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py +++ b/v03_pipeline/migrations/annotations/0004_add_gnomad_svs.py @@ -45,4 +45,8 @@ def migrate(ht: hl.Table, **_) -> hl.Table: ) gnomad_svs_ht = ReferenceDataset.gnomad_svs.get_ht(ReferenceGenome.GRCh38) ht = ht.annotate(gnomad_svs=sv.gnomad_svs(ht, gnomad_svs_ht)) - return ht.drop('info.GNOMAD_V4.1_TRUTH_VID') + ht = ht.drop('info.GNOMAD_V4.1_TRUTH_VID') + return ht.annotate_globals( + versions=ht.globals.versions.annotate(gnomad_svs='1.0'), + enums=ht.globals.enums.annotate(gnomad_svs=hl.Struct()), + ) From b87bd2efef8bdcbc00b516b7c82ec3363fa58a99 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Tue, 4 Mar 2025 15:39:14 -0500 Subject: [PATCH 12/18] do alleles field validation only if it exists on ht --- v03_pipeline/lib/misc/validation.py | 3 + .../lib/reference_datasets/gnomad_svs_test.py | 63 ++++++++++++++++++ .../gnomad_svs_from_vcf.ht/.README.txt.crc | Bin 0 -> 12 bytes .../raw/gnomad_svs_from_vcf.ht/._SUCCESS.crc | Bin 0 -> 8 bytes .../.metadata.json.gz.crc | Bin 0 -> 32 bytes .../raw/gnomad_svs_from_vcf.ht/README.txt | 3 + .../raw/gnomad_svs_from_vcf.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../globals/metadata.json.gz | Bin 0 -> 239 bytes .../globals/parts/.part-0.crc | Bin 0 -> 12 bytes .../globals/parts/part-0 | Bin 0 -> 36 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 112 bytes .../metadata.json.gz | Bin 0 -> 185 bytes .../gnomad_svs_from_vcf.ht/metadata.json.gz | Bin 0 -> 2659 bytes .../rows/.metadata.json.gz.crc | Bin 0 -> 48 bytes .../rows/metadata.json.gz | Bin 0 -> 4976 bytes ...0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.crc | Bin 0 -> 56 bytes ...art-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6 | Bin 0 -> 6030 bytes 20 files changed, 69 insertions(+) create mode 100644 v03_pipeline/lib/reference_datasets/gnomad_svs_test.py create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/.README.txt.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/._SUCCESS.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/.metadata.json.gz.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/README.txt create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/_SUCCESS create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/globals/.metadata.json.gz.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/globals/metadata.json.gz create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/globals/parts/.part-0.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/globals/parts/part-0 create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/index/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.idx/.index.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/index/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.idx/.metadata.json.gz.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/index/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.idx/index create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/index/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.idx/metadata.json.gz create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/metadata.json.gz create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/rows/.metadata.json.gz.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/rows/metadata.json.gz create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/rows/parts/.part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.crc create mode 100644 v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/rows/parts/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6 diff --git a/v03_pipeline/lib/misc/validation.py b/v03_pipeline/lib/misc/validation.py index 063312f47..234f11edc 100644 --- a/v03_pipeline/lib/misc/validation.py +++ b/v03_pipeline/lib/misc/validation.py @@ -31,6 +31,9 @@ def validate_allele_type( **_: Any, ) -> None: ht = t.rows() if isinstance(t, hl.MatrixTable) else t + if not hasattr(ht, 'alleles'): + return + ht = ht.filter( dataset_type.invalid_allele_types.contains( hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]), diff --git a/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py b/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py new file mode 100644 index 000000000..8cc1dd90c --- /dev/null +++ b/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py @@ -0,0 +1,63 @@ +import unittest +from unittest.mock import patch + +import hail as hl + +from v03_pipeline.lib.model import ReferenceGenome +from v03_pipeline.lib.reference_datasets.reference_dataset import ReferenceDataset + +TEST_GNOMAD_SVS_RAW_HT = ( + 'v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht' +) + + +class GnomadSVsTest(unittest.TestCase): + @patch('v03_pipeline.lib.reference_datasets.gnomad_svs.vcf_to_ht') + def test_gnomad_svs(self, mock_vcf_to_ht): + mock_vcf_to_ht.return_value = hl.read_table(TEST_GNOMAD_SVS_RAW_HT) + ht = ReferenceDataset.gnomad_svs.get_ht(ReferenceGenome.GRCh38) + self.assertEqual( + ht.collect(), + [ + hl.Struct( + KEY='gnomAD-SV_v3_BND_chr1_1a45f73a', + AF=0.11413399875164032, + AC=8474, + AN=74246, + N_HET=8426, + N_HOMREF=28673, + ), + hl.Struct( + KEY='gnomAD-SV_v3_BND_chr1_3fa36917', + AF=0.004201000090688467, + AC=466, + AN=110936, + N_HET=466, + N_HOMREF=55002, + ), + hl.Struct( + KEY='gnomAD-SV_v3_BND_chr1_7bbf34b5', + AF=0.03698499873280525, + AC=3119, + AN=84332, + N_HET=3115, + N_HOMREF=39049, + ), + hl.Struct( + KEY='gnomAD-SV_v3_BND_chr1_933a2971', + AF=0.3238990008831024, + AC=21766, + AN=67200, + N_HET=21616, + N_HOMREF=11909, + ), + hl.Struct( + KEY='gnomAD-SV_v3_DUP_chr1_01c2781c', + AF=0.0019970000721514225, + AC=139, + AN=69594, + N_HET=139, + N_HOMREF=34658, + ), + ], + ) diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/.README.txt.crc b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/.README.txt.crc new file mode 100644 index 0000000000000000000000000000000000000000..844a375ed8925b53c2fafae2cd92dac9928e1d6f GIT binary patch literal 12 TcmYc;N@ieSU}C8KW_KC@6P*LM literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/._SUCCESS.crc b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..0e5fd659414995c68046534130b7972efaf1f8e4 GIT binary patch literal 32 ocmYc;N@ieSU}Eri5xO+Si__+9R`$gW*Y|mUNf11+{(_ko0KF&;mjD0& literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/README.txt b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/README.txt new file mode 100644 index 000000000..269755c89 --- /dev/null +++ b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.133-4c60fddb171a + Created at 2025/03/04 14:19:46 \ No newline at end of file diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/_SUCCESS b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/_SUCCESS new file mode 100644 index 000000000..e69de29bb diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/globals/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/globals/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..782d7d3e899d51645ff6eb159812b9cd77122b9d GIT binary patch literal 12 TcmYc;N@ieSU}AW={=yvq6|Dq+ literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/globals/metadata.json.gz b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/globals/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..369b04d91accc5ed605b5343b6d440e356c500c5 GIT binary patch literal 239 zcmV;bb<-N-|w&!*WMQNahQkOo3!Xhot@sOphhu z+Xb+TL}JoPl@63eUzu1pLJq&=T3c1?HTk=x@2Pt24%I(ugW1{~uPiDr%+9*ajcFxf zX201m==Xbr$Jl3xc#b}q<0q5e-zOV6{E1TgIJP+jE(+xG(g7*#YogtTR8ct;R6~di pc`b*?;m=^5$t&cd&WNj_iN(Kf-Mg|59g8)iJ9033T literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/index/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.idx/metadata.json.gz b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/index/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..8ca1d35c2af5a1f1a83390b897b68e65ada41239 GIT binary patch literal 185 zcmV;q07m~GiwFP!0000009B5`3c@fDME_+^3ON+5wdN**9uyQ6FXADs+a?$iNw#1p z{dc$CyetDV^LBb@jKv#OntTA6XRAVhM$>8nITP^0bWLvr z2^R`oO}+JX7v*8Rlihiv=jBu=Ytdqq>kw4BbC8LT0R$-LJSpOr|B%shdrT4JLV$5k n{G1eN4*WE`!FxJ3O+=ot%$Y=lnHZd(;slXM<5MN7`dGeS@aEDhRr8&-wT+GGW_{JV zwE4oy90JwU9ASUed3z-|bcbMi9r3`#msYwqJ2?A?^-A6Hw`9RQBglf$wa#eXWUmuJg#mMgroJeZ#YzbL=D7!rTO|9PP!mh|yTTe&)okeay3whqoAa`}FfX0%SMvvax3sJ%h8n z_;!9<>dR)7qzP6I2bn*xlhjp^p!yIho*v4jv9{|gDtq0tv~Uf|h?fPD1175Fb9M^P z4KbhEB1x#P=WYs`Hm>NU{flr305NEkx0)tEYNuq$@Yf5*g*Ch;C%6 z)VYZj=+$C&qE8IvC{TeKoEAglbPao)DL(Izt6E$gjB4X>^;=SG-_Fv9B`hkCc~@1Y zIPecX7hPC5#|tgN|CBx)E{Qi#(G6bL*(=h};G7(CIVHE71V$}E_q8i25)SrW9aCR6O_EDw+~);zZwn$o@W_zX>aldm$q z8fw{11o!}3h>x(r_E6gUS?2-W&2{0|ExzH8|7X!58ndpNUnkW?Y?Yw}k=c05c*sDJ zMmr2j)6>~V&$WkazI`G^%hzP+10URv$Tp$5YvNroZt-I!qyMb142@|D=lH|y7}q`8 zF<0;zN+1fqu5{cv(hxT_sAZ zL<>>*-iAiX&A115=+!9<-vl65g=&z!=m$2xscHKL?@;rKo)HpwkLCwwhcMnB_tlQa{A19m4Bf3}(&_YPS zLJUemN7Ks}3x+v;kbE(Q3oJ_lnfOP&QnwET1VIw-FhRwpetDo!Bp&KsnsI^JV~tzr zz)2jVm=P|3Bjd9c%!STeq4pMWONbwF5B1G*Ew3h39 z^FTH{m=aU`^-=)4!^IZAK$Tg-rD~dfn1oe>Rfb$1eh6iY2tLmeM8Tc!90E^)nxSi7 zf9(e zQ>Dlt;9biCXmnvj-y<4m-boo8FtN|VE_)8}6{grn0K*@Q!@yhu6y4#VhY!q405YF} zWG@B;Bk)i$`IF2rq0>^Rg%`g02E>744awa{FcA9Wmjyp?XHs+P{vfuDrIDFAxJHID zD+0BWPh+VNK_|4wyV^N0AvKtg+Ob_~=#F31(6+nOaJ$sNa=X+deo=$`qz3s(4I-pA zW`i=WZG1gO2HC0b7L)58f|M$ksz9*-s`wfu1=r!D<|nj_NQR)}>_m*BT);I&*TBGu z5DO7uv^ki<&uH_oJ)uZ2bV>+~oRSL@MvY5DsW5RM$5!}*WZ^R%g!2L^f~%SIqbHz^#d>_IyK%Z!i%%a5EmJc?92gu(w%}4z^q5o!nZa$z9xHK6 za82grcQ=GpkQ*we`ax{oqMC4G@H7gH;xP*j|bCm|FSXZKjB9T~&4)0Vr@j*3k} z)q_WP5@e7^I~xi3+5}Nsi!i2ZtgEGw^}$ccJqMXY$%ba!qs!cUk)S-aO5VZTC(;p6 z_YX^5uH%S_ZN6#_RXN5$$~@jw>(lYTvb^x$-hk@}eny*S~BSpowN}6Rttout*^DF$l6-O|YDjUPdC3=z%MKre?2_ zn9Cz@&oNEGl0$uR`?ownm^YP@!-0OjzuqHgVe%`HyzonN^Gm#Bj%oSXZq`=A>7|S9 zE9|x3E8J|Wdh>$8Y;3wC&N zS~>jXKk}`z-oOepBj=BO$%F%6-yrdmrWUX~SP^nubsR8qxVp4?ysTxVP`IFE~PYT!h$_N+JD|0RF1?I-48?xi6FPCnn3t~W)z-w|W$J1Qi9fV21Cjb22P^wot*Mwzc8NvX zChQ~2)@O=*8R{UXbvG(Py)yg{WaLC640}&gV3C(P-I5cW`eaUIZZ~=nA%5Pjk2m6< z1cV+i-{XDdSFL(Sa<8Sxr`#NgAM4${(TfBed}_;V|Jm<`_CRoJS&DwR{F%56U#0es-u-!Z z(}W?z`im5sWr{~?KdXm875_34J$Z92FxV2GFEUT6895%8{ia6@P8~|PtcNpOt=Ur^`%ak?6cg)x9IuLY^fF3q0n|IKhyesJySHENpLmpN1xsUK8;7Q+R~q674DX6o0@1zftJ(se|OAA{s|UZENqX8__oBZ2r& zBYxE*ej1=4%5W4qox3Qr5LYQLfF{#WZdpL(uTUmWD5^23MItHdL|8>M`--=1KC0_F zvo};oTp|MWIrnLpRT{H*^=m=e-RlbB2-3x&V$8#M9x4=?6*PncFbqwgEkdmnsqrJd z%Bs@GpS!k5lnIO~(^UN$ttBlJ3VFnf{|RZ1<+V|n5KTm(fh?%4-jnkj z%H>&9-cC?d+;wViHz6@h#N*Geo`zd}p!T){>Rs_>RtVMp9f$IA+~a>8dfu{CH@wW) zG_+-|OLzg{cf;w1G=rJFH%n$8Fjt@m ztdEDrDUK3*Nb$V;Y#6@#Y!bhb;+Xqv8>oNE-kdn}<8>aM0(ajWIou+t57lD=uBv|t z&#I?<_y;L&!r`PXGji1ZFbx?mS;tY#W%xDePrPIy$DPX8g7g8}()#bMX*1=~GJ3Sa z`1;0opMV9$N*kao(xmCem&M9mE(Lk&*&V9$8>{kd0VJxA419|U7w8j(3{+Ct^!0&H z-*1foyz|7Pl&kE2eUX9ziwj!TM11?|Gp)0 zIK_7~g*T`;M^ybl|FLy1p2q%?3SR4-QupJbF1QrUEjN_t9$pLBjec6-Gs6C3by&O^ zZ}a=oMQ^nzX-8#5uo;uQj;D`9(3)ZFYOG`VV6vPyPUV4iDm-XXN+X^JQR9C784NI< z?o*3X^|OlW!Q6j*LlK%Tb_MD z`hxGizaa*csu2EQU}0#Pet6V%(YEVd3oKrT>#_d8$olxOLkf&5rkffX4I4;{gi&`r z45XEmh;+fqPJ2Fb1M3brN1HP~z+ivAea>haM_=?Wm5L-QG)$^*aGpMpQ9?cL19{c^&V)wg(#r@38TU)W29 z`G~zF1JZ7*r4tM^Fs0F!9SvzUNwb8*eQ)Gf{4}K%7xXiQHq!>?=4%%sf$qvG+-=)DaJel`%{9Jd92sK%xN0y?iWC~ zp3KfhzuQt!A*6 zQ{SBN38r&CnZ}u=UnI5&;uhs;lZIpD8oWa{Cj5Oik+MOUc+D8IC_xEKO4StH;rimK z9chQ|w3te>&n2e=LT^#(@%9#(m!$rv5M~*2Xxo)1$sj6Y9WDERFXpk;BNyqsI<`F% zwJyo@$-bCvGSexth5SM?eaAd5a*@LP*l$($$|G1#if}N!6@DWh;$Kht+&wJ3VCs?( z1}o4VFmp6#EEb_FnC!3buE2$+o%hn>bv91V;Y@|qpYBj`{<=)gmz)RPZ*a%}K1dsbdfr+qr z-14-f1}4V7RPjRE2URJTOgHeyS_;H#CA8yzQ{B1#f)uj%WP)NS6BG0=F(iG{ZM;qq zbaJTU0gKT2Zbb%HLnjD#F1Y{YPETGx$l2_6pBs~Q_&yOW_Ia(N^R6owb6|xLo$sH! zGoht?uZRSg>tv8+o`ICL%*hX;6`?+1wW&5}LN0u*Az5)kt zUd9}4Okk&K9W`5NKPYKtuIiiO;0q|>p0J%^euk}|9kSfGEEtGZ2Cu)m(Q6Ga0>!j>%nJnH$rpGpUCg_+WDy z+X$zuo|ci(LrtA^KE>vcU7I=lBso`u7<*%#NwHZJUlX-K)en7=9!*LKNg!*r3ivKn zCCIFI;v(8Kf8b}_Qp=`3;$~fPfr}^W!bI)7k}^Lgo``=*831q zZfZJuPGtM`UL&DojtjdrIJU(|@Y5jpb*-_!5vL{d)9>sNQa(>H`6Cty<6GJj9>T( zG8ld9x*gB&wT0Nv6^}ysZLnB(Yg$%%!8He$>@pXHSh2febnrg9IFD+uUjd6X)HxvG za?7y8<|Afn$<8agR$~hc;9`O*@>543GQ1!KY5y7*(Q_L zAH=c{*LgjB))oTi5%BTp*trcxG6*5cElfv0aBhom3%3j}|5P`HCl#*>O7A~J)(0xX zhp(#*9bSkNUHVwYCfjn$(X;-`P7;Gl+FRVkd%x0sj+Oeh|3YBF+0gw%;k?%G_lSp= z;>BqdIcoP+wbve{E^2x5)3tHVJvmY8!yP9h$s$`n=R7F`(~A4tit`TXn>>!9WXf7R z%LMN_&7>xOmdQ-`#;W*RCt=2AKjORZ!5WSePdmb<7P3V!ZQo)pdqL$ZKN>MT3$S}P z_LV*(T%=#)q1upg(ZH?WnhHknCu3E2=Iq#Gb~vr5nWw^_YcFmp`m+)n{A8;Vd9|Ea zweD0TIw$Mf+<^+`zq;+h@aN*`n+D;W!cO*o&L`_f=Nm3eR=@65yJ}4CUFXca0yK45 zU+a*#Ru}30Q!h}dsWstGN5>9kpc7|b(gkGbt8`4 zoaU3O>f{U20J6Kdc0o%FfLO-ChCJ1cdnI`Q&CVzvoycp~dK(H=o5z0i3hn<2oJWvd zxEuY$n0_+YqVOOnTsDvggErlDP5xBjCAu_AMD6K;)_*YU>F2nx8sRr1uL(%?Rb{bb zKL8RoBJUwK#?ND80nN?NSJor@%k01^|sI!#!qJrawPGo2P=F1oWgu>EYEqWWFx(T-crsb>8jPnNmRe`RU8o_hJUpRpv)Ses8yS<`lBUT^-z z+QpxFC7iMTqW{ezefkftJkWn-Aze;c)>pjIx*e4prSF+I1rJ2tC3I55bsJwt!i!Qa z8S>?!RlBBx`M2qTQ+4m7j176H3a9jA0bp(CWT>JPWveAo`epd6v~n-8a?tRHtSrGI zvJ+}b2dsK?nNmHlV4{ykB&@j7O=|nyPWtONH+t>6KFznA*{2lG^R@H&1F82>zCZ?2 zcYkKs8qZ#<>IN#YVve7emrMwXa0qV!+C`cFF!x}hn85Wg% zk<;U;L7OrXP^!t5wljJ>bdgn?MMJ_;(zzFuJcH$sqY?D$DpGLDN#>A(FZr*dc&WI@ z%ItW!>%jtgmO00DkIN_tm*Q@lP$CX_IU7B)8M#^nh!-WMJ7e`C!HO|oj14*iUYA4w zl7OZHppU-5K3gus?t9WD@}MP@E@hLX<#%F7I`#dMgZHZ@OCJoMlmKU!#)9? zf1CpSgrYL$X6BBnW_5&pkDX)`=vShmg&snyP0@P14uOZ{1(h5$L%h8;YSq5JXrIKp>3)A zBU=K%IvD?R^<3|GV*F z0^!f0=;5^IaUJqV_krM5bh7EOnq_;3xrY)|Ck|IF1s@4KP}|AA*S2>|d}lB?nAIX1 z7);zX0=qS~)*pJBUvt~*{pe(?`|;`YSs6gy!{kkn7hFDyshh%Y*DM literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/rows/parts/.part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.crc b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/rows/parts/.part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6.crc new file mode 100644 index 0000000000000000000000000000000000000000..1db011f0445b2286895768bdd3952108949eec2e GIT binary patch literal 56 zcmV-80LTAha$^7h00IEGCR@g^w2n)JcSos6P==EYXi4Go^`%JmV{UrSKlB0RkU~M2 OO^Z?FXfCh!QbFXNxf&Y) literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/rows/parts/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6 b/v03_pipeline/var/test/reference_datasets/raw/gnomad_svs_from_vcf.ht/rows/parts/part-0-e3666fa7-5bc8-471d-ab31-f4fad8e9ebb6 new file mode 100644 index 0000000000000000000000000000000000000000..1c7396f07ba276703f63763535e3bcd17b266af2 GIT binary patch literal 6030 zcmV;97jfu$7XSc$A^-p=wJ-f(ej#1E0J>mVQa2C~jcW{Z#r($l|Lfl=SfWti(M&ur z3u)iA0bpCUiBdpoQAglU-6{y3P|Ya7>Hl}$d+#k%3LOOw1s4TCPprno3_TZ1Aw4H+ zU*F0B$B-V60be4z8D~6*kY_BVusZY^p`D1YUC!HcEFv<>QD*L6si;n1mNK=> zN~|^XtQLHR4q<4P+@iO96!U_dQTwepoGNfgiaGZ%#|aPUTOpl~u+eDH&P8kw0CD`O zQaOlPo^WY70k5jf=ugtA-qn~Qqg_s*-NzC0#BGQI&CWi^JCRbuM{4;Ggdm*@8O_V!A`1g8&G8o(`o z_Ir9E0Ms(n@L|xLrVwIt*ZpBUb&OCvQ|tEnlH1R&e1djGIyo6y8QBbPdjPZS99Gr{ z4ytu$_ff zTjWN$v3n+~ON(2P+T<+MGN2P0$OGI&Bgs@e{$sWRm2=QK=;Vm<5H zcfRakeiIKjtR#++L`+`1z=Q$AHGkB(fSgi6=;uDi2wNsp8%s%ZDerOAr71yy!_kh_ z9wz9c^^b{=+qj>nm{<|Oimir%kO#r?&sr5c!dOky4%H6!W{g~3#hl!qLxIfDjA5fe zxDZ<(V)1Q+=4otnV_6-eC8mUiV^z($epY1p8KFa0ir@;nE zBZUcU@cB7`OCfqmp6xe*H&I?{va*Q>CRq${YEZ*jGR*op&C=-V3CVVyCOUrqgvF+! zE*7)h+Y@Z@l6pyT9nD6wp&XSR1c zSN!$`wspt1@n=9t5tVPUtww^iMCqq}3G|}G2Wq=oyB9`&jWEBg$uHB-qgkYfrA6&Q z(w0D<$h5*Cj`XrimLur`c1;X3jkAT<++6bWnJZNE;JPi)y z?iXXq01b;d5A9f9vW6-vZp#JU#=JnQ!D5BCy=IcnL6Taxih zA)nUB#Y{M{Lc45a?*L3JTNGnrevTYdU<6j|ot?y;$uIjt0Gli{>O^w=LAZ8I6>+zp z?apRbvYQje%g_>}~}BPC?4rh1w&0#+X7nLS;bQ= zL4b?>-TAR2O^5w49gxO_swfkFpR}V5W8CSO)l4SFfh@S_C zM8$-{?M2T1&=?;r$Cek1YOb_9Yi-+vC?&*FcYd~_1O`XxVskaQQa-tbf^P=pV?t67 zt(~jc{x$VfiZJGtg~gRMmgPZ0>Us+(qL4X)My1&t} z=um-D60p*)El)2VqnavEcp_`@wB^gQN-)CpYifIgkIGLT%LOG`y6e=wp}l+gT{z|( z=v_MgrCrl2&WN;U^)gdyBuk+?skagw_>SIoH*Me9)FQ^ijm!g8-ql9Xv;*T$C;6kl zexewxA26)}+LOS<0ZtL3`Kx(TQJyRwXem$P<9`I>c4N$+U%OO0H}D-yGIdLFuz1TC zOn6pRlHZEt!V%2Er0nyVA&pn(TS1cpGIC(C-AAMhbkhAvK(BsEfq&lrYsTBQ=E9p_4WjaSWA1uYBlRM1-XfYTBP5}5WJVl;$(t(2uPzbo}8CHU0=CAaa zbzIfcE`PQkh~<1#F<+`TrqqrokGnyhGK-BWv+7B^BevV*ZOX5INB|3I=-YL)+r(x5DiTf(u7=P? zlL@&^9x}0&CjlE;?P%>^%AZkr%j!8>`gid(|>zvvW%jzuYciCOKn1lIwCW zsYP8o*Rve~>eH~HgEoc@1J9Z1H6sWS=cD4V@vh!f>G69lAc{I*_06_>&9p1nGDUWl z#+4i6!!Z`RabQ`~-R{`wa%8?6H1FLhpjgxjR@h5>v6I)x_d&_S{ z^Qp$?TbHDa2zN9&cnbh%j@84vI|5P<=uQPadD_VC^kwavD#Xb1Or3IXDq(g$;vW+kSY zYzK-&V`61+VD-lbm@rkrY-$7Ojz`$E zdn12GV+0JeUD;3Nl(0a|+TMc+E!2Pl$buXj)^1x4ndkgal$XU8t3Axtj==TXptPKF z6ZrfpW|Q{>5WN2qoe*2euBg-v@5l{->;B@gsBW` z;G;e#laaGqL82U?yc}5ucfV;XdJ2Nlr(I~UorB-E5TQx|tSj$PXVZol<$e8FHbxX2 z51gDJNSxFKhCePc`l2AZ*d7$79SikS5Us!v3&!bDQ^2zI>EQWJBG)c=1`1dJb}Ql3 zf4Kc-n*fGgYQVLR$kHwkiTf2rM8C+Pw~u;>BrD5}#&cHSOUi&5<$>31l9)&1Z^7_O0n_Ed_uIkI*NUmFW%a2*_7&Q6Jw*S;U0j3a0 z1l?hHR46iuN&|(@2^V$6ODMx-Ik)2?`Ay6%-5#a{+U2?Sui|gf+JzlLsrggdy0_Xs z0p1fQwWqFzZO+ze(v=%iezWCZtig(D-@0x0SJLip&SMIF3=LNfaB`I4OE-Tgt!?f@ z6_k;ko4oM_zkZGE$i3!G7o1QjK<(f&?L&B}KW^Y8tc;GK_Cm^`Hc5UgLom}V(nh-% zvb~2<^}*Z_BN$6s+g(AY{VsV9eSYA~jzOrdQs8W}iDPN2io$9~@ z#e$hMwk$GqX&|mV?b4L;n#ndrRd6DEda9t=W*RG=kOcB)#WOXu6iD6WQgXSx*{P9u zwG0_B-j1d(Ol#6w{5;9WcI4!;Z*FF9()eDIOM4zj!Q7P-sk5M06pN+;?yVs>YmWx}?@ z&!M1Em3ysyrFLbF?a8j%y_OBj+NL2|+sz2R1I;92iipuCb1Y*a^+HWMQ@cZP)X_=c zOJZ$$adKq8q28NqjxEiubNDS0|b~5!M$R}PO=t0s$rijd?NR(>AuFiKG^(Os_@Z<=S3LkuNn6REx1j#H+H=HL*b z6-9pTilMh=$ZG3!$6eEZ)5ILgapLVd?P19sP(RPL4h<3PHYzYzK*}31>Ex$}My%ba z9qCi7lbGX4q1?xgB6&?NG3*sUQ-bjRPUlD#VwM|8fLS~XoEy5KjM~-;Bi$GVWQJx8 z8~=({rkQ1U*A3)Cx$_0X_9k%2anRl%vF=Co6DY{T>ZqCBelT_lFcljj|D>+R2WaF| zdy4iRCtlh{mw4o31284ZYi;ZvxlOivB`(^)ZbemlPkUEPrJEBA9_Pa@xmDX@g{FTT zR%UzgEo}jmS~=Q(61|XVSyg#hC|@9Owxj)h^rBLN_Qi>zYgiVsw0|E}A9nMnKm;9x z-R-mBh+*{m#$1&7Es$&P_Onp+WOtc|pU@#~4VST8Kxj8e_O~-?Vqo03L6rDmZw3jYd~voD>LBi< z{*yqt9)Dzn?L=JOgMzme6n=mn^2eYU;YG$H0BEa|BsD{p8F@qt?0sPlB`GZ$5ZYzO zcBI28Y2Y6Oic$|di^2dQ2uyU?efZonNy`C14{EACtYL|Q$OA6^)l7se2M#VY4h*yh z)bPjY=or+F0|kue1?8QYk1c=@>a-`cWkRJZOs{_gY>CP7NlN>eXB0m$KzP(R`q(T( zJH?|dEf+o@O|pAFKTgygS=QR+!uAqk7*rNC=$41otjwfQ+1NOnz68{x5Plzw$SE~1 z&32s4c9~Q35x82-?)5eJLm+P;#uoh^wycNnNpZ?Koo+YiN`Aaw9+0cjPSKtN+AcQ^ zQG7gL=$FLzud8bQ#F)4pE{uBP`hbrOx(qe>YX{o2nTkR^d)U%7Mq$d3^bt3N3*u-_Det@fQ|>g5v_jwq5>fHwyH@Lr1Am?8mxzZ4NhM8)k|YfVMgpGwzdBP3v;Y9LgSGRR{)d||A)})ED}I8s z1q!MBt5%!{FNuRPx0MJ_!R<>i_44Qg#zi|9w!Lc2A)5hzmD#?@?OefTjDvUC0?ojI zLNY`@wnXfp64bEnl9EVrLe1})0R{OmUTE)KMd4O`E;u-%~Yu;MZi zW@cfsP!zckh+K*I;@L3JRIft25MjIb?)B*^>#J+)+p>GT^>6a{)(8Vq_Tw!hm|1Il zDmaCxf@Hx9u8Y(OO2vey7AjOkk-iSD9!JhF`n>6a1>*xn5FV7vM*__s6tV|8n=JU6 zbYTw2MAEhUV0+hd4XWvk4f1Qs6s8!^ux+mq*$s^3IoFt04*5dBcN9^-)3rv2$Qw?L zL(3h1^G3^Ok>qMSj&|AhZ4_wnsS?V}))eHy@MZb8E}ixwxpozKT(A)~9J<85-0+XVzUCNW8VY>A>DwZ! zMDZ@Ip&ld1$P}4V@L6zL9$VHcHtjvHy-UI zeS!pksXe5@+34U_Nxe^sEwf02wf9Qe%S6RQl9RthPVySmjTU@7@Wq+4z(0a+3bEX_ zPpsi*Tj-wYmzh^mCJx~@uwhcT#%mY4hPvS7db2EEIE()pu9<@jF2t-(&|b z+Am&2>w@9?Do5XhAdG&_T+sK|B~z064Ngh=98wHbJkXWb%HcsPr07ITKkB@7nS5~% z?WdG>Q4?AjU;1#GZ>3?%GLl~gXu;7iQqn4QxX-BU5YY|5K103H9A{GAJtR(w3F*Zz ziH!cPRA1J_L{B<^CfuZxl1hgH*<$ms;H1^+RD94muY3dguR;#Fp)}zc?BOqmL){p# zv>VVD=D=pbf{{CFQ734m4sIn!ozL4mC&T=5-xPSlcxC;K5cupOZLFQBRe6T$tZBXx zL0yABoIoEPTE`UL=ox{_fY$Y8ADak|_prbG=zWkFcB;MK0*y8urunw_{%)gY%{qt% zU`-DM^py{u+%yIckTwxHfJS4nfcxjmTB7Omgnl(3B1i^O$RP)>y~WT-|dS|`q5 zo>cPMaC}HSMq>gMZI#obRS5d$Ryn+Kkx8vmQ-#|rc|J7*g=}dnIsVjo;KI*YO_-+u zt}T#H04YL^HG+l?U$*;>WXqLx5*m;5)iwNdHw+N$Pj3hdaHRaut6=P=(hGNTKo-^j z9y|C3#h(#~XNVF+7`AVOLgnGCyXB$qYxmvp;o!6y{65>Y>m$7U5SH9LOfZfM6SPvF z@Pz`_+yGU%13>Ia!b0-^?j+nB5dmnCQ}Ne?ah;pC3^opIB#}7^4FCWD00000D77#B IAOHaX0GRuvIsgCw literal 0 HcmV?d00001 From 19b81ca697538b0dc711fcdc4a30a044e6ea273e Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Tue, 4 Mar 2025 20:28:21 -0500 Subject: [PATCH 13/18] handle set of dataset types during allele type validation --- v03_pipeline/lib/misc/validation.py | 31 +++++++++++-------- v03_pipeline/lib/misc/validation_test.py | 4 +-- .../reference_datasets/reference_dataset.py | 11 ++++--- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/v03_pipeline/lib/misc/validation.py b/v03_pipeline/lib/misc/validation.py index 063312f47..f7bc93520 100644 --- a/v03_pipeline/lib/misc/validation.py +++ b/v03_pipeline/lib/misc/validation.py @@ -1,3 +1,4 @@ +from collections.abc import Iterable from typing import Any import hail as hl @@ -27,25 +28,29 @@ def __init__( def validate_allele_type( t: hl.Table | hl.MatrixTable, - dataset_type: DatasetType, + dataset_type: DatasetType | Iterable[DatasetType], **_: Any, ) -> None: ht = t.rows() if isinstance(t, hl.MatrixTable) else t - ht = ht.filter( - dataset_type.invalid_allele_types.contains( - hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]), - ), + dataset_types = ( + [dataset_type] if isinstance(dataset_type, DatasetType) else dataset_type ) - if ht.count() > 0: - collected_alleles = sorted( - [tuple(x) for x in ht.aggregate(hl.agg.collect_as_set(ht.alleles))], + for dataset_type in dataset_types: + ht = ht.filter( + dataset_type.invalid_allele_types.contains( + hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]), + ), ) - # Handle case where all invalid alleles are NON_REF, indicating a gvcf: - if all('' in alleles for alleles in collected_alleles): - msg = 'Alleles with invalid allele are present in the callset. This appears to be a GVCF containing records for sites with no variants.' + if ht.count() > 0: + collected_alleles = sorted( + [tuple(x) for x in ht.aggregate(hl.agg.collect_as_set(ht.alleles))], + ) + # Handle case where all invalid alleles are NON_REF, indicating a gvcf: + if all('' in alleles for alleles in collected_alleles): + msg = 'Alleles with invalid allele are present in the callset. This appears to be a GVCF containing records for sites with no variants.' + raise SeqrValidationError(msg) + msg = f'Alleles with invalid AlleleType are present in the callset: {collected_alleles[:10]}' raise SeqrValidationError(msg) - msg = f'Alleles with invalid AlleleType are present in the callset: {collected_alleles[:10]}' - raise SeqrValidationError(msg) def validate_no_duplicate_variants( diff --git a/v03_pipeline/lib/misc/validation_test.py b/v03_pipeline/lib/misc/validation_test.py index b1057e1e2..4146e8ddc 100644 --- a/v03_pipeline/lib/misc/validation_test.py +++ b/v03_pipeline/lib/misc/validation_test.py @@ -85,7 +85,7 @@ def test_validate_allele_type(self) -> None: "Alleles with invalid AlleleType are present in the callset: \\[\\('A', '-'\\), \\('A', ''\\)\\]", validate_allele_type, mt, - DatasetType.SNV_INDEL, + [DatasetType.SNV_INDEL], ) mt = ( @@ -119,7 +119,7 @@ def test_validate_allele_type(self) -> None: 'Alleles with invalid allele are present in the callset. This appears to be a GVCF containing records for sites with no variants.', validate_allele_type, mt, - DatasetType.SNV_INDEL, + [DatasetType.SNV_INDEL], ) def test_validate_imputed_sex_ploidy(self) -> None: diff --git a/v03_pipeline/lib/reference_datasets/reference_dataset.py b/v03_pipeline/lib/reference_datasets/reference_dataset.py index a8482c578..8532f3d3f 100644 --- a/v03_pipeline/lib/reference_datasets/reference_dataset.py +++ b/v03_pipeline/lib/reference_datasets/reference_dataset.py @@ -104,6 +104,12 @@ def version(self, reference_genome: ReferenceGenome) -> str: ) return version + def dataset_types( + self, + reference_genome: ReferenceGenome, + ) -> frozenset[DatasetType]: + return CONFIG[self][reference_genome][DATASET_TYPES] + @property def enums(self) -> dict | None: return CONFIG[self].get(ENUMS) @@ -143,10 +149,7 @@ def get_ht( if enum_selects: ht = ht.transmute(**enum_selects) ht = filter_contigs(ht, reference_genome) - # Reference Datasets are DatasetType agnostic, but these - # methods (in theory) support SV/GCNV. SNV_INDEL - # is passed as a proxy for non-SV/GCNV. - validate_allele_type(ht, DatasetType.SNV_INDEL) + validate_allele_type(ht, self.dataset_types) validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL) # NB: we do not filter with "filter" here # ReferenceDatasets are DatasetType agnostic and that From a5330e5773b18f12be2487e7c92854ffff3af324 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 5 Mar 2025 04:38:45 -0500 Subject: [PATCH 14/18] this is a cleaner approach --- v03_pipeline/lib/misc/validation.py | 31 ++++++++----------- v03_pipeline/lib/misc/validation_test.py | 4 +-- .../reference_datasets/reference_dataset.py | 3 +- 3 files changed, 17 insertions(+), 21 deletions(-) diff --git a/v03_pipeline/lib/misc/validation.py b/v03_pipeline/lib/misc/validation.py index f7bc93520..063312f47 100644 --- a/v03_pipeline/lib/misc/validation.py +++ b/v03_pipeline/lib/misc/validation.py @@ -1,4 +1,3 @@ -from collections.abc import Iterable from typing import Any import hail as hl @@ -28,29 +27,25 @@ def __init__( def validate_allele_type( t: hl.Table | hl.MatrixTable, - dataset_type: DatasetType | Iterable[DatasetType], + dataset_type: DatasetType, **_: Any, ) -> None: ht = t.rows() if isinstance(t, hl.MatrixTable) else t - dataset_types = ( - [dataset_type] if isinstance(dataset_type, DatasetType) else dataset_type + ht = ht.filter( + dataset_type.invalid_allele_types.contains( + hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]), + ), ) - for dataset_type in dataset_types: - ht = ht.filter( - dataset_type.invalid_allele_types.contains( - hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]), - ), + if ht.count() > 0: + collected_alleles = sorted( + [tuple(x) for x in ht.aggregate(hl.agg.collect_as_set(ht.alleles))], ) - if ht.count() > 0: - collected_alleles = sorted( - [tuple(x) for x in ht.aggregate(hl.agg.collect_as_set(ht.alleles))], - ) - # Handle case where all invalid alleles are NON_REF, indicating a gvcf: - if all('' in alleles for alleles in collected_alleles): - msg = 'Alleles with invalid allele are present in the callset. This appears to be a GVCF containing records for sites with no variants.' - raise SeqrValidationError(msg) - msg = f'Alleles with invalid AlleleType are present in the callset: {collected_alleles[:10]}' + # Handle case where all invalid alleles are NON_REF, indicating a gvcf: + if all('' in alleles for alleles in collected_alleles): + msg = 'Alleles with invalid allele are present in the callset. This appears to be a GVCF containing records for sites with no variants.' raise SeqrValidationError(msg) + msg = f'Alleles with invalid AlleleType are present in the callset: {collected_alleles[:10]}' + raise SeqrValidationError(msg) def validate_no_duplicate_variants( diff --git a/v03_pipeline/lib/misc/validation_test.py b/v03_pipeline/lib/misc/validation_test.py index 4146e8ddc..b1057e1e2 100644 --- a/v03_pipeline/lib/misc/validation_test.py +++ b/v03_pipeline/lib/misc/validation_test.py @@ -85,7 +85,7 @@ def test_validate_allele_type(self) -> None: "Alleles with invalid AlleleType are present in the callset: \\[\\('A', '-'\\), \\('A', ''\\)\\]", validate_allele_type, mt, - [DatasetType.SNV_INDEL], + DatasetType.SNV_INDEL, ) mt = ( @@ -119,7 +119,7 @@ def test_validate_allele_type(self) -> None: 'Alleles with invalid allele are present in the callset. This appears to be a GVCF containing records for sites with no variants.', validate_allele_type, mt, - [DatasetType.SNV_INDEL], + DatasetType.SNV_INDEL, ) def test_validate_imputed_sex_ploidy(self) -> None: diff --git a/v03_pipeline/lib/reference_datasets/reference_dataset.py b/v03_pipeline/lib/reference_datasets/reference_dataset.py index 8532f3d3f..60f0fadf6 100644 --- a/v03_pipeline/lib/reference_datasets/reference_dataset.py +++ b/v03_pipeline/lib/reference_datasets/reference_dataset.py @@ -149,7 +149,8 @@ def get_ht( if enum_selects: ht = ht.transmute(**enum_selects) ht = filter_contigs(ht, reference_genome) - validate_allele_type(ht, self.dataset_types) + for dataset_type in self.dataset_types(reference_genome) + validate_allele_type(ht, dataset_type) validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL) # NB: we do not filter with "filter" here # ReferenceDatasets are DatasetType agnostic and that From e4f2b80d3a4dcd66d9a01422bdc50e6ca1d32924 Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 5 Mar 2025 04:42:29 -0500 Subject: [PATCH 15/18] format --- v03_pipeline/lib/reference_datasets/reference_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/lib/reference_datasets/reference_dataset.py b/v03_pipeline/lib/reference_datasets/reference_dataset.py index 60f0fadf6..d5add22e7 100644 --- a/v03_pipeline/lib/reference_datasets/reference_dataset.py +++ b/v03_pipeline/lib/reference_datasets/reference_dataset.py @@ -149,7 +149,7 @@ def get_ht( if enum_selects: ht = ht.transmute(**enum_selects) ht = filter_contigs(ht, reference_genome) - for dataset_type in self.dataset_types(reference_genome) + for dataset_type in self.dataset_types(reference_genome): validate_allele_type(ht, dataset_type) validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL) # NB: we do not filter with "filter" here From fb0f6fe6caa81c95b0e35993fd8cb5184c47d77c Mon Sep 17 00:00:00 2001 From: Benjamin Blankenmeister Date: Wed, 5 Mar 2025 22:18:30 +0700 Subject: [PATCH 16/18] Update reference_dataset.py --- v03_pipeline/lib/reference_datasets/reference_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v03_pipeline/lib/reference_datasets/reference_dataset.py b/v03_pipeline/lib/reference_datasets/reference_dataset.py index d5add22e7..d2adab683 100644 --- a/v03_pipeline/lib/reference_datasets/reference_dataset.py +++ b/v03_pipeline/lib/reference_datasets/reference_dataset.py @@ -151,7 +151,7 @@ def get_ht( ht = filter_contigs(ht, reference_genome) for dataset_type in self.dataset_types(reference_genome): validate_allele_type(ht, dataset_type) - validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL) + validate_no_duplicate_variants(ht, reference_genome, dataset_type) # NB: we do not filter with "filter" here # ReferenceDatasets are DatasetType agnostic and that # filter is only used at annotation time. From 54127154aa98e5869f0ba7b5205b776e0a5b7f53 Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 5 Mar 2025 11:30:24 -0500 Subject: [PATCH 17/18] run validation on sv get_ht --- v03_pipeline/lib/annotations/sv.py | 1 + v03_pipeline/lib/misc/validation.py | 3 -- .../lib/reference_datasets/gnomad_svs.py | 3 +- .../lib/reference_datasets/gnomad_svs_test.py | 30 +++++++++++++++++++ .../reference_datasets/reference_dataset.py | 2 +- 5 files changed, 33 insertions(+), 6 deletions(-) diff --git a/v03_pipeline/lib/annotations/sv.py b/v03_pipeline/lib/annotations/sv.py index 278c045eb..868232a2f 100644 --- a/v03_pipeline/lib/annotations/sv.py +++ b/v03_pipeline/lib/annotations/sv.py @@ -180,6 +180,7 @@ def gnomad_svs( gnomad_svs_ht: hl.Table, **_: Any, ) -> hl.Expression: + gnomad_svs_ht = gnomad_svs_ht.drop('locus', 'alleles') return gnomad_svs_ht.annotate( ID=gnomad_svs_ht.KEY, )[ht['info.GNOMAD_V4.1_TRUTH_VID']] diff --git a/v03_pipeline/lib/misc/validation.py b/v03_pipeline/lib/misc/validation.py index 234f11edc..063312f47 100644 --- a/v03_pipeline/lib/misc/validation.py +++ b/v03_pipeline/lib/misc/validation.py @@ -31,9 +31,6 @@ def validate_allele_type( **_: Any, ) -> None: ht = t.rows() if isinstance(t, hl.MatrixTable) else t - if not hasattr(ht, 'alleles'): - return - ht = ht.filter( dataset_type.invalid_allele_types.contains( hl.numeric_allele_type(ht.alleles[0], ht.alleles[1]), diff --git a/v03_pipeline/lib/reference_datasets/gnomad_svs.py b/v03_pipeline/lib/reference_datasets/gnomad_svs.py index 5743de3c4..114a45f3b 100644 --- a/v03_pipeline/lib/reference_datasets/gnomad_svs.py +++ b/v03_pipeline/lib/reference_datasets/gnomad_svs.py @@ -14,5 +14,4 @@ def get_ht(path: str, reference_genome: ReferenceGenome) -> hl.Table: N_HET=ht.info.N_HET, N_HOMREF=ht.info.N_HOMREF, ) - ht = ht.key_by('KEY') - return ht.drop('locus', 'alleles') + return ht.key_by('KEY') diff --git a/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py b/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py index 8cc1dd90c..ff24aa3d1 100644 --- a/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py +++ b/v03_pipeline/lib/reference_datasets/gnomad_svs_test.py @@ -21,6 +21,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): [ hl.Struct( KEY='gnomAD-SV_v3_BND_chr1_1a45f73a', + locus=hl.Locus( + contig='chr1', + position=10434, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.11413399875164032, AC=8474, AN=74246, @@ -29,6 +35,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): ), hl.Struct( KEY='gnomAD-SV_v3_BND_chr1_3fa36917', + locus=hl.Locus( + contig='chr1', + position=10440, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.004201000090688467, AC=466, AN=110936, @@ -37,6 +49,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): ), hl.Struct( KEY='gnomAD-SV_v3_BND_chr1_7bbf34b5', + locus=hl.Locus( + contig='chr1', + position=10464, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.03698499873280525, AC=3119, AN=84332, @@ -45,6 +63,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): ), hl.Struct( KEY='gnomAD-SV_v3_BND_chr1_933a2971', + locus=hl.Locus( + contig='chr1', + position=10450, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.3238990008831024, AC=21766, AN=67200, @@ -53,6 +77,12 @@ def test_gnomad_svs(self, mock_vcf_to_ht): ), hl.Struct( KEY='gnomAD-SV_v3_DUP_chr1_01c2781c', + locus=hl.Locus( + contig='chr1', + position=10000, + reference_genome=ReferenceGenome.GRCh38, + ), + alleles=['N', ''], AF=0.0019970000721514225, AC=139, AN=69594, diff --git a/v03_pipeline/lib/reference_datasets/reference_dataset.py b/v03_pipeline/lib/reference_datasets/reference_dataset.py index d5add22e7..d2adab683 100644 --- a/v03_pipeline/lib/reference_datasets/reference_dataset.py +++ b/v03_pipeline/lib/reference_datasets/reference_dataset.py @@ -151,7 +151,7 @@ def get_ht( ht = filter_contigs(ht, reference_genome) for dataset_type in self.dataset_types(reference_genome): validate_allele_type(ht, dataset_type) - validate_no_duplicate_variants(ht, reference_genome, DatasetType.SNV_INDEL) + validate_no_duplicate_variants(ht, reference_genome, dataset_type) # NB: we do not filter with "filter" here # ReferenceDatasets are DatasetType agnostic and that # filter is only used at annotation time. From 251142330eb457560e5879cf2bcc3c270201fdfb Mon Sep 17 00:00:00 2001 From: Julia Klugherz Date: Wed, 5 Mar 2025 12:33:14 -0500 Subject: [PATCH 18/18] fix gnomad_svs ref data mock table --- .../GRCh38/gnomad_svs/1.0.ht/.README.txt.crc | Bin 12 -> 12 bytes .../gnomad_svs/1.0.ht/.metadata.json.gz.crc | Bin 12 -> 12 bytes .../GRCh38/gnomad_svs/1.0.ht/README.txt | 2 +- .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 130 bytes .../metadata.json.gz | Bin 0 -> 157 bytes .../.index.crc | Bin 12 -> 0 bytes .../.metadata.json.gz.crc | Bin 12 -> 0 bytes .../index | Bin 129 -> 0 bytes .../metadata.json.gz | Bin 158 -> 0 bytes .../GRCh38/gnomad_svs/1.0.ht/metadata.json.gz | Bin 303 -> 333 bytes .../1.0.ht/rows/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../gnomad_svs/1.0.ht/rows/metadata.json.gz | Bin 554 -> 610 bytes ...t-0-6ba285cc-16c9-426c-9af3-382a9815db5f.crc | Bin 0 -> 12 bytes ...t-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.crc | Bin 12 -> 0 bytes .../part-0-6ba285cc-16c9-426c-9af3-382a9815db5f | Bin 0 -> 147 bytes .../part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c | Bin 125 -> 0 bytes 18 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/.index.crc create mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/.metadata.json.gz.crc create mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/index create mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/metadata.json.gz delete mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/.index.crc delete mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/.metadata.json.gz.crc delete mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/index delete mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/metadata.json.gz create mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/rows/parts/.part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.crc delete mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/rows/parts/.part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.crc create mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/rows/parts/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f delete mode 100644 v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/rows/parts/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/.README.txt.crc b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/.README.txt.crc index c743d69461bb4ae175b46c374cc6ae1eacdc1b7d..0bc2c61639da9908795e2e19e807fba818a04f58 100644 GIT binary patch literal 12 TcmYc;N@ieSU}CU;wc-%~6Kn(S literal 12 TcmYc;N@ieSU}D(6>gQho6=?*T diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/.metadata.json.gz.crc index f1c831b072498d3bf5d190a0a74ce791ac3ab088..79a393e982006ab9402615b1b276d2f6197188ed 100644 GIT binary patch literal 12 TcmYc;N@ieSU}D($dWi%86omtu literal 12 TcmYc;N@ieSU}CT>Khp>R5%L2( diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/README.txt b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/README.txt index 50aa6a3d3..967ef021c 100644 --- a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/README.txt +++ b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/README.txt @@ -1,3 +1,3 @@ This folder comprises a Hail (www.hail.is) native Table or MatrixTable. Written with version 0.2.133-4c60fddb171a - Created at 2025/02/16 18:40:38 \ No newline at end of file + Created at 2025/03/05 12:27:53 \ No newline at end of file diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/.index.crc b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..6644e60277b453c4a387a07ed55543eafcff1995 GIT binary patch literal 12 TcmYc;N@ieSU}7*=Qx5|G4{`z( literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..7c4edfdb79f7684bb1b70190966bb2c1cd57a048 GIT binary patch literal 12 TcmYc;N@ieSU}9)E6FC(C6DtFC literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/index b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..adf23a1892a28ba885b29196baf5068fb48269ca GIT binary patch literal 130 zcmeZZU|>iGVvVi(e-+a2GBSuTGcYhS$ff7y=Q_IR28YF$8OJ;Mxx^=D6dA@F0D)nW tr5S@^nrV``ld*}VfoTgP6N7?|%EBrpBcM5|Ftb!O85tNEfjmM+0RXo`9&Z2u literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/metadata.json.gz b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-6ba285cc-16c9-426c-9af3-382a9815db5f.idx/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..96b9b1538f1f2e8e155a33ac2e75f02cb5f30b7d GIT binary patch literal 157 zcmb2|=3sz;(B6YwhYSQ<{->U^6wwVWzV6M>)%C!z_qLjbwCDB&&7bAr`S;kKot@2E zTXSjZCkf%hX1<~Myu04qXg_iIV5O(zwsmE_jteYU8e=7GGWMO(Z#o+J=}Btlck|VO z=B#XemYu?-|AO!LMbC1-vDNX3^d|XmW I-v*$o0MkoDQvd(} literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/.index.crc b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/.index.crc deleted file mode 100644 index ea981953947949280e41315c8e224f3958182c6e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}D&ty7?;r6aEAR diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/.metadata.json.gz.crc deleted file mode 100644 index 8dde06781893a7065b7b40f10e49e259df4e2dbd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}6wd)|&$W4iCVvVi(e-+ZMGBU6*GcYhS$ff7y=Q_IR28YF$8OJ;Mxx^=D6dA@F0D)nW sr5S@^nrV``p|OdjfoT#GgMyCA!YU>spfRd2qf|8+85kLXJVGV`0F6r?AOHXW diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/metadata.json.gz b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/index/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c.idx/metadata.json.gz deleted file mode 100644 index b6f286eca51ed7d4dc5c5a2b1dbdb82bf5d2d957..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 158 zcmV;P0Ac?hiwFP!0000009B2#3c^4TME_;#D}u&Tuh%Apf+AR6;wE<(_T;hwL&(3o z+Sv`vyf-u2*wc;j5EH{$Ta-J4I(u$Ho;isZb^fou9al6BaBG9e4|<=y1ReacwmBs^we3Lqv)!mkR9cThipX6FX8UH@B1!ySD_sjkp6%+TuDTRqo_Z|=s>x+f~ec+cY0CuFkr*(F3YyP zQI8^wH0(fwWMEPaBF(8ic=8qX6ic_>-j+G|%BoJPy--^!$kOV9*{W?)r(Eta$l_>1 z%{lxHC0WlVUmL8|Dd6#xOogI$IKXj2UZjosIG(q`Jbj+5uJQZZVlr)0?-Tk6#R@}S z>-X+yksPA&J*Gl{07GJ}X}LZVH_rJfm^$W4g0u#a3Y`aNLP?MHV2jVkBv6()ux2-x zg#^Cys5<>$*O0v^r`mGsmy|M9w0~#~K0+yJDu2xae;sXVAFXome7JGAfUV}1m2qRa fWLA_jC?P|{UjUpxOwczsZKUoO*oO>mOacG^HVC3t literal 303 zcmV+~0nq**iwFP!000000F6;!Yr-%P|1NpXM6J}3x8f=rGB;EPL!_kXsbOhS(l{w4 z-+gJ9SlrXz?tcI7E@{z`3&_4f*IY?N;(0WE#OO@97(g^0O-GX`dYG|zd@PE5GM`X{ zk%l8^kPPZfgUBjs&Q3g_PGV^8;!RV5V|HEBo`lM&AX}p^%(Q+;*K&EnATMPdIBW42 zG-SU@KMz=|YrvBwSqeo>Jj2O?yhsyGakBQsI(trcL;d!;NteELIew0{Za1dNN=cBW zMx;f*YxSk1Ww-9;<7p8j+X`5J@$Jl@w+>a;|NDBeGgVX@Zrte^Qy0I34|osMqG$ew z1^qrcbU9j8;Ph~pa09*PURChgaLJ5lR?t9!hvhX0GNHZ|#hkO*=|AZ%zIouI0;RaInc2fPLc zIi@N@`R_YU$R})SciCJZeEgns9{aqKk4WGxLoS*L%3c^<+)tJ}MtaCV!4o?m_W^zN zf1pQLl9)20H<5cRvhEH$M#tp_sF)7H&Xw=sge{fa6LlO-U4I(ISUG)TGCt#Ou^b;=fG#Rw(1Sf3DY^zVnyi}A)9d@K=ifLxgOU+u$? z$=0pj_+Ga+@Tx5XkqwV%P2xeDC#cd8t#ok7e}llWZOb`N`kP9ZmRU)%=sQ*$h+kiS%H%p4F&t**d6lE;QxCaJRKV#=K4usfn6(;l!+fyg zHUc$6rA8cv2~%YmEJUpE$w0N7G`;ATilatRPPyn_>zmKMY57F z-?+1Wl8MU$&ioXFJYqp>n&&pjBmcHUu91wyl3uR(MwgWM4QHHVx1rm}7kqSy^)ICF z(OPgT&8x26jXq>`{H82LI(vtun!^1j`kB)Uvtv3d+#A@0m|ceq&3?}z<`9RLX$^XK zIOy7RWrx{w_-ld1ntAbFP@R87iGm86TtApDT8nr)nwZP`;Moei`6M74*$q1Ibyq`2 w_h*8>LT>ek9b3z}Akm(J)v`8l4pzo|T}ek$O@@_{fCQ8N1Ih6-&)x?B0OHjv=Kufz literal 554 zcmV+_0@eK=iwFP!000000NqtxZ`v>v{V#qk>G=N0TT+6Cwy_DSN?TP$j(r1OgM%DX zm7@IjU5DlqHnn?fp78a(=N$VS;UkfB3&bXK$>ACJ@@~4?0qc?$@gLzp?0fRX`$-;f z$zl%7Y!dsJPu(7Nz;B8T>;hB5oh#3!39Pi*GktVswl|Ans)zlSb7OmRse9d)v&erf z{LkKPW%2#n(wpUL=8j~O@S${#0xeI2ZP@5eC$`Ec{cjaIaCaE+nh|zhldcx#Ksxs zc`fr)LG**_4c4!(KV|Kv8UYP67vE%U)!9&}MEin2+gHXeGyzkS;KEUg9`0)?z#Y3* zs5;wr`~Jbt|`C1!d@m7d}KC+58|3#)B~5CkL6+g5kX!0L}C%o+n7S(6Yt{cgZ{ z*k{?h^m|F=8r*miBK^I>BUU_(AeF swD@f%~%cvGBGgvvDyG->=^7>dTz3_95PHZO)@_LGUFOMW6Nt} p6H5cr2Minx2UvEPaPU82oz8Zw+rd+5MgT9+b`Su&fPs;L0RSYWEmQyi literal 0 HcmV?d00001 diff --git a/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/rows/parts/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c b/v03_pipeline/var/test/reference_datasets/GRCh38/gnomad_svs/1.0.ht/rows/parts/part-0-febb7dd0-28ce-479c-8ea7-9fe142bddf4c deleted file mode 100644 index 1dd934ec5ddca41ca340cc19bc54bf422ee9f473..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 125 zcmc~`U|^U6#2Q=m|0+zm%fxVng^{86?bZd~k3Fzs?7GR$c;pSUTzX!9uA_@?a9Dhq zalDhCOMG%hkzu?65Ev#|nlae3^xR};Ib@h-nq&^rb&Z{|<+ZVirGY5}3xk5phpYn? T0ZxvLyg++E0BjcnBLf2fU{NMO