diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index c8e0894c1..3c20ee35c 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -36,7 +36,6 @@ jobs: run: ruff . --output-format github - name: Unit Tests run: | - export HAIL_TMP_DIR=/tmp export GRCH37_TO_GRCH38_LIFTOVER_REF_PATH=v03_pipeline/var/test/liftover/grch37_to_grch38.over.chain.gz export GRCH38_TO_GRCH37_LIFTOVER_REF_PATH=v03_pipeline/var/test/liftover/grch38_to_grch37.over.chain.gz export ACCESS_PRIVATE_REFERENCE_DATASETS=1 diff --git a/v03_pipeline/lib/misc/callsets.py b/v03_pipeline/lib/misc/callsets.py index 63fc4939e..e2baa5bd4 100644 --- a/v03_pipeline/lib/misc/callsets.py +++ b/v03_pipeline/lib/misc/callsets.py @@ -30,7 +30,7 @@ def get_callset_ht( return callset_ht.distinct() -def additional_row_fields( +def get_additional_row_fields( mt: hl.MatrixTable, dataset_type: DatasetType, skip_check_sex_and_relatedness: bool, diff --git a/v03_pipeline/lib/misc/validation.py b/v03_pipeline/lib/misc/validation.py index 5540533fd..c4374c4be 100644 --- a/v03_pipeline/lib/misc/validation.py +++ b/v03_pipeline/lib/misc/validation.py @@ -1,6 +1,19 @@ +from typing import Any + import hail as hl -from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType, Sex +from v03_pipeline.lib.model import ( + CachedReferenceDatasetQuery, + DatasetType, + Env, + ReferenceGenome, + SampleType, + Sex, +) +from v03_pipeline.lib.paths import ( + cached_reference_dataset_query_path, + sex_check_table_path, +) AMBIGUOUS_THRESHOLD_PERC: float = 0.01 # Fraction of samples identified as "ambiguous_sex" above which an error will be thrown. MIN_ROWS_PER_CONTIG = 100 @@ -11,9 +24,40 @@ class SeqrValidationError(Exception): pass +def get_validation_dependencies( + dataset_type: DatasetType, + reference_genome: ReferenceGenome, + callset_path: str, + skip_check_sex_and_relatedness: bool, + **_: Any, +) -> dict[str, hl.Table]: + deps = {} + deps['coding_and_noncoding_variants_ht'] = hl.read_table( + cached_reference_dataset_query_path( + reference_genome, + dataset_type, + CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, + ), + ) + if ( + Env.CHECK_SEX_AND_RELATEDNESS + and dataset_type.check_sex_and_relatedness + and not skip_check_sex_and_relatedness + ): + deps['sex_check_ht'] = hl.read_table( + sex_check_table_path( + reference_genome, + dataset_type, + callset_path, + ), + ) + return deps + + def validate_allele_type( mt: hl.MatrixTable, dataset_type: DatasetType, + **_: Any, ) -> None: ht = mt.rows() ht = ht.filter( @@ -31,6 +75,7 @@ def validate_allele_type( def validate_no_duplicate_variants( mt: hl.MatrixTable, + **_: Any, ) -> None: ht = mt.rows() ht = ht.group_by(*ht.key).aggregate(n=hl.agg.count()) @@ -44,6 +89,7 @@ def validate_expected_contig_frequency( mt: hl.MatrixTable, reference_genome: ReferenceGenome, min_rows_per_contig: int = MIN_ROWS_PER_CONTIG, + **_: Any, ) -> None: rows_per_contig = mt.aggregate_rows(hl.agg.counter(mt.locus.contig)) missing_contigs = ( @@ -69,6 +115,7 @@ def validate_imported_field_types( mt: hl.MatrixTable, dataset_type: DatasetType, additional_row_fields: dict[str, hl.expr.types.HailType | set], + **_: Any, ) -> None: def _validate_field( mt_schema: hl.StructExpression, @@ -104,8 +151,12 @@ def _validate_field( def validate_imputed_sex_ploidy( mt: hl.MatrixTable, - sex_check_ht: hl.Table, + # NB: sex_check_ht will be undefined if sex checking is disabled for the run + sex_check_ht: hl.Table | None = None, + **_: Any, ) -> None: + if not sex_check_ht: + return mt = mt.select_cols( discrepant=( ( @@ -132,6 +183,7 @@ def validate_sample_type( reference_genome: ReferenceGenome, sample_type: SampleType, sample_type_match_threshold: float = SAMPLE_TYPE_MATCH_THRESHOLD, + **_: Any, ) -> None: coding_variants_ht = coding_and_noncoding_variants_ht.filter( coding_and_noncoding_variants_ht.coding, diff --git a/v03_pipeline/lib/misc/validation_test.py b/v03_pipeline/lib/misc/validation_test.py index f242b31fe..28c8e8e15 100644 --- a/v03_pipeline/lib/misc/validation_test.py +++ b/v03_pipeline/lib/misc/validation_test.py @@ -1,4 +1,5 @@ import unittest +from unittest.mock import Mock, patch import hail as hl @@ -80,7 +81,9 @@ def test_validate_allele_type(self) -> None: DatasetType.SNV_INDEL, ) - def test_validate_imputed_sex_ploidy(self) -> None: + @patch('v03_pipeline.lib.misc.validation.Env') + def test_validate_imputed_sex_ploidy(self, mock_env: Mock) -> None: + mock_env.CHECK_SEX_AND_RELATEDNESS = True sex_check_ht = hl.read_table(TEST_SEX_CHECK_1) mt = hl.MatrixTable.from_parts( rows={ diff --git a/v03_pipeline/lib/tasks/base/base_loading_run_params.py b/v03_pipeline/lib/tasks/base/base_loading_run_params.py index 4b5ad92dd..cde621c4f 100644 --- a/v03_pipeline/lib/tasks/base/base_loading_run_params.py +++ b/v03_pipeline/lib/tasks/base/base_loading_run_params.py @@ -16,6 +16,7 @@ class BaseLoadingRunParams(luigi.Task): # - The "Loading Pipeline" params are shared with # tasks that may remove data from or change the # structure of the persisted Hail Tables. + run_id = luigi.Parameter() sample_type = luigi.EnumParameter(enum=SampleType) callset_path = luigi.Parameter() ignore_missing_samples_when_remapping = luigi.BoolParameter( diff --git a/v03_pipeline/lib/tasks/update_lookup_table.py b/v03_pipeline/lib/tasks/update_lookup_table.py index 54fa62103..9472be271 100644 --- a/v03_pipeline/lib/tasks/update_lookup_table.py +++ b/v03_pipeline/lib/tasks/update_lookup_table.py @@ -24,7 +24,6 @@ class UpdateLookupTableTask(BaseUpdateLookupTableTask): project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() project_pedigree_paths = luigi.ListParameter() - run_id = luigi.Parameter() def complete(self) -> bool: return super().complete() and hl.eval( diff --git a/v03_pipeline/lib/tasks/update_project_table_test.py b/v03_pipeline/lib/tasks/update_project_table_test.py index 4dba40590..7e6ab67f9 100644 --- a/v03_pipeline/lib/tasks/update_project_table_test.py +++ b/v03_pipeline/lib/tasks/update_project_table_test.py @@ -13,6 +13,8 @@ 'v03_pipeline/var/test/pedigrees/test_pedigree_3_different_families.tsv' ) +TEST_RUN_ID = 'manual__2024-04-03' + class UpdateProjectTableTaskTest(MockedDatarootTestCase): def test_update_project_table_task(self) -> None: @@ -20,6 +22,7 @@ def test_update_project_table_task(self) -> None: upt_task = UpdateProjectTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_VCF, project_guid='R0113_test_project', @@ -128,6 +131,7 @@ def test_update_project_table_task_different_pedigree(self) -> None: upt_task = UpdateProjectTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_VCF, project_guid='R0113_test_project', @@ -140,6 +144,7 @@ def test_update_project_table_task_different_pedigree(self) -> None: upt_task = UpdateProjectTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_VCF, project_guid='R0113_test_project', diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py index 559f65bd8..77289559c 100644 --- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py +++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py @@ -23,7 +23,6 @@ class UpdateVariantAnnotationsTableWithNewSamplesTask( project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() project_pedigree_paths = luigi.ListParameter() - run_id = luigi.Parameter() def requires(self) -> list[luigi.Task]: return [ diff --git a/v03_pipeline/lib/tasks/validate_callset.py b/v03_pipeline/lib/tasks/validate_callset.py index d0cf49d04..c37fe1f37 100644 --- a/v03_pipeline/lib/tasks/validate_callset.py +++ b/v03_pipeline/lib/tasks/validate_callset.py @@ -2,11 +2,10 @@ import luigi import luigi.util -from v03_pipeline.lib.misc.callsets import additional_row_fields from v03_pipeline.lib.misc.validation import ( + get_validation_dependencies, validate_allele_type, validate_expected_contig_frequency, - validate_imported_field_types, validate_imputed_sex_ploidy, validate_no_duplicate_variants, validate_sample_type, @@ -14,9 +13,7 @@ from v03_pipeline.lib.model import CachedReferenceDatasetQuery from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import ( - cached_reference_dataset_query_path, imported_callset_path, - sex_check_table_path, ) from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask @@ -63,8 +60,8 @@ def requires(self) -> list[luigi.Task]: ] if ( Env.CHECK_SEX_AND_RELATEDNESS - and not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness + and not self.skip_check_sex_and_relatedness ): requirements = [ *requirements, @@ -83,17 +80,6 @@ def update_table(self, mt: hl.MatrixTable) -> hl.MatrixTable: self.callset_path, ), ) - # This validation isn't override-able. If a field is the wrong - # type, the pipeline will likely hard-fail downstream. - validate_imported_field_types( - mt, - self.dataset_type, - additional_row_fields( - mt, - self.dataset_type, - self.skip_check_sex_and_relatedness, - ), - ) if self.dataset_type.can_run_validation: # Rather than throwing an error, we silently remove invalid contigs. # This happens fairly often for AnVIL requests. @@ -104,38 +90,34 @@ def update_table(self, mt: hl.MatrixTable) -> hl.MatrixTable: ) if not self.skip_validation and self.dataset_type.can_run_validation: - validate_allele_type(mt, self.dataset_type) - validate_no_duplicate_variants(mt) - validate_expected_contig_frequency(mt, self.reference_genome) - coding_and_noncoding_ht = hl.read_table( - cached_reference_dataset_query_path( - self.reference_genome, - self.dataset_type, - CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS, - ), + validation_dependencies = get_validation_dependencies( + **self.param_kwargs, + ) + validate_allele_type( + mt, + **self.param_kwargs, + **validation_dependencies, + ) + validate_no_duplicate_variants( + mt, + **self.param_kwargs, + **validation_dependencies, + ) + validate_expected_contig_frequency( + mt, + **self.param_kwargs, + **validation_dependencies, ) validate_sample_type( mt, - coding_and_noncoding_ht, - self.reference_genome, - self.sample_type, + **self.param_kwargs, + **validation_dependencies, + ) + validate_imputed_sex_ploidy( + mt, + **self.param_kwargs, + **validation_dependencies, ) - if ( - Env.CHECK_SEX_AND_RELATEDNESS - and not self.skip_check_sex_and_relatedness - and self.dataset_type.check_sex_and_relatedness - ): - sex_check_ht = hl.read_table( - sex_check_table_path( - self.reference_genome, - self.dataset_type, - self.callset_path, - ), - ) - validate_imputed_sex_ploidy( - mt, - sex_check_ht, - ) return mt.select_globals( callset_path=self.callset_path, validated_sample_type=self.sample_type.value, diff --git a/v03_pipeline/lib/tasks/write_family_table_test.py b/v03_pipeline/lib/tasks/write_family_table_test.py index 3adc96901..5c6995146 100644 --- a/v03_pipeline/lib/tasks/write_family_table_test.py +++ b/v03_pipeline/lib/tasks/write_family_table_test.py @@ -12,6 +12,8 @@ TEST_PEDIGREE_3 = 'v03_pipeline/var/test/pedigrees/test_pedigree_3.tsv' TEST_PEDIGREE_5 = 'v03_pipeline/var/test/pedigrees/test_pedigree_5.tsv' +TEST_RUN_ID = 'manual__2024-04-03' + class WriteFamilyTableTaskTest(MockedDatarootTestCase): def test_snv_write_family_table_task(self) -> None: @@ -19,6 +21,7 @@ def test_snv_write_family_table_task(self) -> None: wft_task = WriteFamilyTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_SNV_INDEL_VCF, project_guid='R0113_test_project', @@ -156,6 +159,7 @@ def test_sv_write_family_table_task(self) -> None: write_family_table_task = WriteFamilyTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SV, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_SV_VCF, project_guid='R0115_test_project2', @@ -408,6 +412,7 @@ def test_gcnv_write_family_table_task(self) -> None: write_family_table_task = WriteFamilyTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.GCNV, + run_id=TEST_RUN_ID, sample_type=SampleType.WES, callset_path=TEST_GCNV_BED_FILE, project_guid='R0115_test_project2', diff --git a/v03_pipeline/lib/tasks/write_imported_callset.py b/v03_pipeline/lib/tasks/write_imported_callset.py index 2ecce4f38..6235988f6 100644 --- a/v03_pipeline/lib/tasks/write_imported_callset.py +++ b/v03_pipeline/lib/tasks/write_imported_callset.py @@ -2,13 +2,16 @@ import luigi import luigi.util -from v03_pipeline.lib.misc.callsets import additional_row_fields +from v03_pipeline.lib.misc.callsets import get_additional_row_fields from v03_pipeline.lib.misc.io import ( import_callset, import_vcf, select_relevant_fields, split_multi_hts, ) +from v03_pipeline.lib.misc.validation import ( + validate_imported_field_types, +) from v03_pipeline.lib.misc.vets import annotate_vets from v03_pipeline.lib.model.environment import Env from v03_pipeline.lib.paths import ( @@ -79,14 +82,22 @@ def create_table(self) -> hl.MatrixTable: ) filters_ht = import_vcf(filters_path, self.reference_genome).rows() mt = mt.annotate_rows(filters=filters_ht[mt.row_key].filters) + additional_row_fields = get_additional_row_fields( + mt, + self.dataset_type, + self.skip_check_sex_and_relatedness, + ) mt = select_relevant_fields( mt, self.dataset_type, - additional_row_fields( - mt, - self.dataset_type, - self.skip_check_sex_and_relatedness, - ), + additional_row_fields, + ) + # This validation isn't override-able by the skip option. + # If a field is the wrong type, the pipeline will likely hard-fail downstream. + validate_imported_field_types( + mt, + self.dataset_type, + additional_row_fields, ) if self.dataset_type.has_multi_allelic_variants: mt = split_multi_hts(mt) diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run.py b/v03_pipeline/lib/tasks/write_metadata_for_run.py index c784b1785..4e9d2fdea 100644 --- a/v03_pipeline/lib/tasks/write_metadata_for_run.py +++ b/v03_pipeline/lib/tasks/write_metadata_for_run.py @@ -17,7 +17,6 @@ class WriteMetadataForRunTask(luigi.Task): project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() project_pedigree_paths = luigi.ListParameter() - run_id = luigi.Parameter() def output(self) -> luigi.Target: return GCSorLocalTarget( diff --git a/v03_pipeline/lib/tasks/write_new_variants_table.py b/v03_pipeline/lib/tasks/write_new_variants_table.py index 1def006cc..fb16ac134 100644 --- a/v03_pipeline/lib/tasks/write_new_variants_table.py +++ b/v03_pipeline/lib/tasks/write_new_variants_table.py @@ -48,7 +48,6 @@ class WriteNewVariantsTableTask(BaseWriteTask): project_guids = luigi.ListParameter() project_remap_paths = luigi.ListParameter() project_pedigree_paths = luigi.ListParameter() - run_id = luigi.Parameter() @property def annotation_dependencies(self) -> dict[str, hl.Table]: diff --git a/v03_pipeline/lib/tasks/write_project_family_tables_test.py b/v03_pipeline/lib/tasks/write_project_family_tables_test.py index ae5bc9913..3d23e9b60 100644 --- a/v03_pipeline/lib/tasks/write_project_family_tables_test.py +++ b/v03_pipeline/lib/tasks/write_project_family_tables_test.py @@ -13,6 +13,8 @@ TEST_PEDIGREE_4 = 'v03_pipeline/var/test/pedigrees/test_pedigree_4.tsv' TEST_PEDIGREE_4_SUBSET = 'v03_pipeline/var/test/pedigrees/test_pedigree_4_subset.tsv' +TEST_RUN_ID = 'manual__2024-04-03' + class WriteProjectFamilyTablesTest(MockedDatarootTestCase): def test_snv_write_project_family_tables_task(self) -> None: @@ -20,6 +22,7 @@ def test_snv_write_project_family_tables_task(self) -> None: write_project_family_tables = WriteProjectFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_SNV_INDEL_VCF, project_guid='R0113_test_project', @@ -57,6 +60,7 @@ def test_snv_write_project_family_tables_task(self) -> None: write_project_family_tables_subset = WriteProjectFamilyTablesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_SNV_INDEL_VCF, project_guid='R0113_test_project', diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py b/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py index e54be1c6b..c96ba9ecb 100644 --- a/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py +++ b/v03_pipeline/lib/tasks/write_relatedness_check_table_test.py @@ -24,6 +24,8 @@ TEST_GNOMAD_QC_HT = 'v03_pipeline/var/test/reference_data/gnomad_qc_crdq.ht' TEST_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf' +TEST_RUN_ID = 'manual__2024-04-03' + MOCK_CONFIG = { 'gnomad_qc': { '38': { @@ -89,6 +91,7 @@ def test_relatedness_check_table_task_gnomad_qc_updated( task = WriteRelatednessCheckTableTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_VCF, ) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py index f4c08f1dc..e3e0a0e4f 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py @@ -62,8 +62,8 @@ def requires(self) -> list[luigi.Task]: ] if ( Env.CHECK_SEX_AND_RELATEDNESS - and not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness + and not self.skip_check_sex_and_relatedness ): requirements = [ *requirements, @@ -98,8 +98,8 @@ def create_table(self) -> hl.MatrixTable: families_failed_sex_check = {} if ( Env.CHECK_SEX_AND_RELATEDNESS - and not self.skip_check_sex_and_relatedness and self.dataset_type.check_sex_and_relatedness + and not self.skip_check_sex_and_relatedness ): relatedness_check_ht = hl.read_table(self.input()[2].path) sex_check_ht = hl.read_table(self.input()[3].path) diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py index ad4cd3640..1ed7550a6 100644 --- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py +++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py @@ -21,6 +21,8 @@ 'v03_pipeline/var/test/relatedness_check/test_relatedness_check_1.ht' ) +TEST_RUN_ID = 'manual__2024-04-03' + class WriteRemappedAndSubsettedCallsetTaskTest(MockedDatarootTestCase): def setUp(self) -> None: @@ -79,6 +81,7 @@ def test_write_remapped_and_subsetted_callset_task( wrsc_task = WriteRemappedAndSubsettedCallsetTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_VCF, project_guid='R0113_test_project', @@ -121,6 +124,7 @@ def test_write_remapped_and_subsetted_callset_task_failed_sex_check_family( wrsc_task = WriteRemappedAndSubsettedCallsetTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SNV_INDEL, + run_id=TEST_RUN_ID, sample_type=SampleType.WGS, callset_path=TEST_VCF, project_guid='R0114_project4', diff --git a/v03_pipeline/lib/tasks/write_variant_annotations_vcf_test.py b/v03_pipeline/lib/tasks/write_variant_annotations_vcf_test.py index d96096be0..ea1ef11f8 100644 --- a/v03_pipeline/lib/tasks/write_variant_annotations_vcf_test.py +++ b/v03_pipeline/lib/tasks/write_variant_annotations_vcf_test.py @@ -49,13 +49,13 @@ def test_sv_export_vcf( UpdateVariantAnnotationsTableWithNewSamplesTask( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SV, + run_id='run_id1', sample_type=SampleType.WGS, callset_path=TEST_SV_VCF, project_guids=['R0115_test_project2'], project_remap_paths=['not_a_real_file'], project_pedigree_paths=[TEST_PEDIGREE_5], skip_validation=True, - run_id='run_id1', ) ) worker.add(update_variant_annotations_task) @@ -64,6 +64,7 @@ def test_sv_export_vcf( write_variant_annotations_vcf_task = WriteVariantAnnotationsVCF( reference_genome=ReferenceGenome.GRCh38, dataset_type=DatasetType.SV, + run_id='run_id1', sample_type=SampleType.WGS, callset_path=TEST_SV_VCF, )