diff --git a/v03_pipeline/bin/dataproc_vep_init.bash b/v03_pipeline/bin/dataproc_vep_init.bash
index 2be6c7b7e..fe880f260 100755
--- a/v03_pipeline/bin/dataproc_vep_init.bash
+++ b/v03_pipeline/bin/dataproc_vep_init.bash
@@ -52,9 +52,9 @@ EOF
 gcc -Wall -Werror -O2 /vep.c -o /vep
 chmod u+s /vep
 
-gcloud storage cp gs://seqr-luigi/releases/$ENVIRONMENT/latest/bin/download_vep_data.bash /download_vep_data.bash
-chmod +x /download_vep_data.bash
-./download_vep_data.bash $REFERENCE_GENOME
+gcloud storage cp gs://seqr-luigi/releases/$ENVIRONMENT/latest/bin/download_vep_reference_data.bash /download_vep_reference_data.bash
+chmod +x /download_vep_reference_data.bash
+./download_vep_reference_data.bash $REFERENCE_GENOME
 
 gcloud storage cp gs://seqr-luigi/releases/$ENVIRONMENT/latest/bin/vep /vep.bash
 chmod +x /vep.bash
diff --git a/v03_pipeline/bin/download_vep_data.bash b/v03_pipeline/bin/download_vep_reference_data.bash
similarity index 88%
rename from v03_pipeline/bin/download_vep_data.bash
rename to v03_pipeline/bin/download_vep_reference_data.bash
index 57b65427d..98dbad91b 100755
--- a/v03_pipeline/bin/download_vep_data.bash
+++ b/v03_pipeline/bin/download_vep_reference_data.bash
@@ -3,7 +3,7 @@
 set -eux
 
 REFERENCE_GENOME=$1
-VEP_DATA=/seqr/vep_data
+VEP_REFERENCE_DATASETS_DIR=${VEP_REFERENCE_DATASETS_DIR:-/seqr/vep-reference-data}
 
 case $REFERENCE_GENOME in
   GRCh38)
@@ -43,20 +43,20 @@ case $REFERENCE_GENOME in
     exit 1
 esac
 
-if [ -f $VEP_DATA/$REFERENCE_GENOME/_SUCCESS ]; then
+if [ -f $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME/_SUCCESS ]; then
    echo "Skipping download because already successful"
    exit 0;
 fi
 
-mkdir -p $VEP_DATA/$REFERENCE_GENOME;
+mkdir -p $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME;
 for vep_reference_data_file in ${VEP_REFERENCE_DATA_FILES[@]}; do
     if  [[ $vep_reference_data_file == *.tar.gz ]]; then
         echo "Downloading and extracting" $vep_reference_data_file;
-        gsutil cat $vep_reference_data_file | tar -xzf - -C $VEP_DATA/$REFERENCE_GENOME/ &
+        gsutil cat $vep_reference_data_file | tar -xzf - -C $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME/ &
     else 
         echo "Downloading" $vep_reference_data_file;
         gsutil cp $vep_reference_data_file $VEP_DATA/$REFERENCE_GENOME/ &
     fi
 done;
 wait
-touch $VEP_DATA/$REFERENCE_GENOME/_SUCCESS
+touch $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME/_SUCCESS
diff --git a/v03_pipeline/bin/vep b/v03_pipeline/bin/vep
index b3558545e..33996bf27 100755
--- a/v03_pipeline/bin/vep
+++ b/v03_pipeline/bin/vep
@@ -3,7 +3,7 @@
 set -eux
 
 REFERENCE_GENOME=$1
-VEP_DATA=/seqr/vep_data
+VEP_REFERENCE_DATASETS_DIR=${VEP_REFERENCE_DATASETS_DIR:-/seqr/vep-reference-data}
 VEP_DOCKER_IMAGE="gcr.io/seqr-project/vep-docker-image"
 
 case $REFERENCE_GENOME in
@@ -17,5 +17,5 @@ case $REFERENCE_GENOME in
 esac
 
 shift # Remove the REFERENCE_GENOME arg.
-docker run --platform linux/amd64 -i -v $VEP_DATA/$REFERENCE_GENOME:/opt/vep/.vep/:ro $VEP_DOCKER_IMAGE:$REFERENCE_GENOME \
+docker run --platform linux/amd64 -i -v $VEP_REFERENCE_DATASETS_DIR/$REFERENCE_GENOME:/opt/vep/.vep/:ro $VEP_DOCKER_IMAGE:$REFERENCE_GENOME \
   /opt/vep/src/ensembl-vep/vep $@
diff --git a/v03_pipeline/lib/misc/family_loading_failures.py b/v03_pipeline/lib/misc/family_loading_failures.py
index 80190b327..e3b1b59db 100644
--- a/v03_pipeline/lib/misc/family_loading_failures.py
+++ b/v03_pipeline/lib/misc/family_loading_failures.py
@@ -3,11 +3,14 @@
 import hail as hl
 import numpy as np
 
+from v03_pipeline.lib.logger import get_logger
 from v03_pipeline.lib.misc.pedigree import Family, Relation, Sample
 from v03_pipeline.lib.model import Sex
 
 RELATEDNESS_TOLERANCE = 0.2
 
+logger = get_logger(__name__)
+
 
 def passes_relatedness_check(
     relatedness_check_lookup: dict[tuple[str, str], list],
@@ -175,10 +178,19 @@ def get_families_failed_sex_check(
     failed_families = defaultdict(list)
     for family in families:
         for sample_id in family.samples:
-            if family.samples[sample_id].sex not in {
-                sex_check_lookup[sample_id],
-                Sex.UNKNOWN,
-            }:  # NB: Unknown samples in pedigree are excluded from sex check.
+            # NB: Both Unknown samples in pedigree and Unknown
+            # samples in the predicted_sex are precluded from
+            # failing the sex check.
+            if (
+                sex_check_lookup[sample_id] == Sex.UNKNOWN  # noqa: PLR1714
+                or family.samples[sample_id].sex == Sex.UNKNOWN
+            ):
+                logger.info(
+                    f'Encountered sample with Unknown sex excluded from sex check: {sample_id}',
+                )
+                continue
+
+            if family.samples[sample_id].sex != sex_check_lookup[sample_id]:
                 failed_families[family].append(
                     f'Sample {sample_id} has pedigree sex {family.samples[sample_id].sex.value} but imputed sex {sex_check_lookup[sample_id].value}',
                 )
diff --git a/v03_pipeline/lib/misc/family_loading_failures_test.py b/v03_pipeline/lib/misc/family_loading_failures_test.py
index 3f3dbda4e..1a970a8c8 100644
--- a/v03_pipeline/lib/misc/family_loading_failures_test.py
+++ b/v03_pipeline/lib/misc/family_loading_failures_test.py
@@ -56,12 +56,12 @@ def test_build_relatedness_check_lookup(self):
     def test_build_sex_check_lookup(self):
         ht = hl.Table.parallelize(
             [
-                {'s': 'remapped_id', 'predicted_sex': 'M'},
-                {'s': 'ROS_006_18Y03227_D1', 'predicted_sex': 'M'},
-                {'s': 'ROS_006_18Y03228_D1', 'predicted_sex': 'M'},
-                {'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'M'},
-                {'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'F'},
-                {'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'M'},
+                {'s': 'ROS_006_18Y03226_D1', 'predicted_sex': 'F'},
+                {'s': 'ROS_006_18Y03227_D1', 'predicted_sex': 'F'},
+                {'s': 'ROS_006_18Y03228_D1', 'predicted_sex': 'F'},
+                {'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'F'},
+                {'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'M'},
+                {'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'U'},
             ],
             hl.tstruct(
                 s=hl.tstr,
@@ -72,12 +72,12 @@ def test_build_sex_check_lookup(self):
         self.assertEqual(
             build_sex_check_lookup(ht, hl.dict({'ROS_006_18Y03226_D1': 'remapped_id'})),
             {
-                'remapped_id': Sex.MALE,
-                'ROS_006_18Y03227_D1': Sex.MALE,
-                'ROS_006_18Y03228_D1': Sex.MALE,
-                'ROS_007_19Y05919_D1': Sex.MALE,
-                'ROS_007_19Y05939_D1': Sex.FEMALE,
-                'ROS_007_19Y05987_D1': Sex.MALE,
+                'remapped_id': Sex.FEMALE,
+                'ROS_006_18Y03227_D1': Sex.FEMALE,
+                'ROS_006_18Y03228_D1': Sex.FEMALE,
+                'ROS_007_19Y05919_D1': Sex.FEMALE,
+                'ROS_007_19Y05939_D1': Sex.MALE,
+                'ROS_007_19Y05987_D1': Sex.UNKNOWN,
             },
         )
 
@@ -178,12 +178,12 @@ def test_all_relatedness_checks(self):
     def test_get_families_failed_sex_check(self):
         sex_check_ht = hl.Table.parallelize(
             [
-                {'s': 'ROS_006_18Y03226_D1', 'predicted_sex': 'M'},
-                {'s': 'ROS_006_18Y03227_D1', 'predicted_sex': 'F'},
+                {'s': 'ROS_006_18Y03226_D1', 'predicted_sex': 'F'},
+                {'s': 'ROS_006_18Y03227_D1', 'predicted_sex': 'F'},  # Pedigree Sex U
                 {'s': 'ROS_006_18Y03228_D1', 'predicted_sex': 'F'},
                 {'s': 'ROS_007_19Y05919_D1', 'predicted_sex': 'F'},
-                {'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'F'},
-                {'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'F'},
+                {'s': 'ROS_007_19Y05939_D1', 'predicted_sex': 'M'},
+                {'s': 'ROS_007_19Y05987_D1', 'predicted_sex': 'U'},  # Pedigree Sex F
             ],
             hl.tstruct(
                 s=hl.tstr,
@@ -201,7 +201,7 @@ def test_get_families_failed_sex_check(self):
             failed_families.values(),
             [
                 [
-                    'Sample ROS_006_18Y03226_D1 has pedigree sex F but imputed sex M',
+                    'Sample ROS_007_19Y05939_D1 has pedigree sex F but imputed sex M',
                 ],
             ],
         )
diff --git a/v03_pipeline/lib/misc/io.py b/v03_pipeline/lib/misc/io.py
index ef2b26ecd..bea1b9bc5 100644
--- a/v03_pipeline/lib/misc/io.py
+++ b/v03_pipeline/lib/misc/io.py
@@ -1,13 +1,17 @@
 import hashlib
 import math
 import os
+import re
 import uuid
+from collections.abc import Callable
+from string import Template
 
 import hail as hl
 import hailtop.fs as hfs
 
 from v03_pipeline.lib.misc.gcnv import parse_gcnv_genes
 from v03_pipeline.lib.misc.nested_field import parse_nested_field
+from v03_pipeline.lib.misc.validation import SeqrValidationError
 from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome, Sex
 
 BIALLELIC = 2
@@ -15,8 +19,28 @@
 MB_PER_PARTITION = 128
 MAX_SAMPLES_SPLIT_MULTI_SHUFFLE = 100
 
-MALE = 'Male'
-FEMALE = 'Female'
+
+def validated_hl_function(
+    regex_to_msg: dict[str, str | Template],
+) -> Callable[[Callable], Callable]:
+    def decorator(fn: Callable) -> Callable:
+        def wrapper(*args, **kwargs) -> hl.Table | hl.MatrixTable:
+            try:
+                t, _ = checkpoint(fn(*args, **kwargs))
+            except Exception as e:
+                for regex, msg in regex_to_msg.items():
+                    match = re.search(regex, str(e))
+                    if match and isinstance(msg, Template):
+                        msg = msg.substitute(match=match.group(1))  # noqa: PLW2901
+                    if match:
+                        raise SeqrValidationError(msg) from e
+                raise
+            else:
+                return t
+
+        return wrapper
+
+    return decorator
 
 
 def does_file_exist(path: str) -> bool:
@@ -49,7 +73,15 @@ def compute_hail_n_partitions(file_size_b: int) -> int:
     return math.ceil(file_size_b / B_PER_MB / MB_PER_PARTITION)
 
 
-def split_multi_hts(mt: hl.MatrixTable) -> hl.MatrixTable:
+@validated_hl_function(
+    {
+        'RVD error! Keys found out of order': 'Your callset failed while attempting to split multiallelic sites.  This error can occur if the dataset contains both multiallelic variants and duplicated loci.',
+    },
+)
+def split_multi_hts(
+    mt: hl.MatrixTable,
+    max_samples_split_multi_shuffle=MAX_SAMPLES_SPLIT_MULTI_SHUFFLE,
+) -> hl.MatrixTable:
     bi = mt.filter_rows(hl.len(mt.alleles) == BIALLELIC)
     # split_multi_hts filters star alleles by default, but we
     # need that behavior for bi-allelic variants in addition to
@@ -59,7 +91,7 @@ def split_multi_hts(mt: hl.MatrixTable) -> hl.MatrixTable:
     multi = mt.filter_rows(hl.len(mt.alleles) > BIALLELIC)
     split = hl.split_multi_hts(
         multi,
-        permit_shuffle=mt.count()[1] < MAX_SAMPLES_SPLIT_MULTI_SHUFFLE,
+        permit_shuffle=mt.count()[1] < max_samples_split_multi_shuffle,
     )
     mt = split.union_rows(bi)
     return mt.distinct_by_row()
@@ -103,6 +135,15 @@ def import_gcnv_bed_file(callset_path: str) -> hl.MatrixTable:
     return mt.unfilter_entries()
 
 
+@validated_hl_function(
+    {
+        '.*FileNotFoundException|GoogleJsonResponseException: 403 Forbidden|arguments refer to no files.*': 'Unable to access the VCF in cloud storage.',
+        # NB: ?: is non-capturing group.
+        '.*(?:InvalidHeader|VCFParseError): (.*)$': Template(
+            'VCF failed file format validation: $match',
+        ),
+    },
+)
 def import_vcf(
     callset_path: str,
     reference_genome: ReferenceGenome,
@@ -139,6 +180,13 @@ def import_callset(
     return mt.key_rows_by(*dataset_type.table_key_type(reference_genome).fields)
 
 
+@validated_hl_function(
+    {
+        'instance has no field (.*)': Template(
+            'Your callset is missing a required field: $match',
+        ),
+    },
+)
 def select_relevant_fields(
     mt: hl.MatrixTable,
     dataset_type: DatasetType,
@@ -165,12 +213,17 @@ def select_relevant_fields(
 
 def import_imputed_sex(imputed_sex_path: str) -> hl.Table:
     ht = hl.import_table(imputed_sex_path)
+    imputed_sex_lookup = hl.dict(
+        {s.imputed_sex_value: s.value for s in Sex},
+    )
     ht = ht.select(
         s=ht.collaborator_sample_id,
         predicted_sex=(
             hl.case()
-            .when(ht.predicted_sex == FEMALE, Sex.FEMALE.value)
-            .when(ht.predicted_sex == MALE, Sex.MALE.value)
+            .when(
+                imputed_sex_lookup.contains(ht.predicted_sex),
+                imputed_sex_lookup[ht.predicted_sex],
+            )
             .or_error(
                 hl.format(
                     'Found unexpected value %s in imputed sex file',
diff --git a/v03_pipeline/lib/misc/io_test.py b/v03_pipeline/lib/misc/io_test.py
index ab0638d8c..24792755a 100644
--- a/v03_pipeline/lib/misc/io_test.py
+++ b/v03_pipeline/lib/misc/io_test.py
@@ -1,4 +1,5 @@
 import unittest
+from unittest import mock
 
 import hail as hl
 
@@ -6,13 +7,19 @@
     compute_hail_n_partitions,
     file_size_bytes,
     import_imputed_sex,
+    import_vcf,
     remap_pedigree_hash,
+    select_relevant_fields,
+    split_multi_hts,
 )
+from v03_pipeline.lib.misc.validation import SeqrValidationError
+from v03_pipeline.lib.model import DatasetType, ReferenceGenome
 
 TEST_IMPUTED_SEX = 'v03_pipeline/var/test/sex_check/test_imputed_sex.tsv'
 TEST_IMPUTED_SEX_UNEXPECTED_VALUE = (
     'v03_pipeline/var/test/sex_check/test_imputed_sex_unexpected_value.tsv'
 )
+TEST_INVALID_VCF = 'v03_pipeline/var/test/callsets/improperly_formatted.vcf'
 TEST_PEDIGREE_3 = 'v03_pipeline/var/test/pedigrees/test_pedigree_3.tsv'
 TEST_MITO_MT = 'v03_pipeline/var/test/callsets/mito_1.mt'
 TEST_REMAP = 'v03_pipeline/var/test/remaps/test_remap_1.tsv'
@@ -38,7 +45,7 @@ def test_import_imputed_sex(self) -> None:
             [
                 hl.Struct(s='abc_1', predicted_sex='M'),
                 hl.Struct(s='abc_2', predicted_sex='F'),
-                hl.Struct(s='abc_3', predicted_sex='M'),
+                hl.Struct(s='abc_3', predicted_sex='U'),
             ],
         )
 
@@ -46,7 +53,7 @@ def test_import_imputed_sex_unexpected_value(self) -> None:
         ht = import_imputed_sex(TEST_IMPUTED_SEX_UNEXPECTED_VALUE)
         self.assertRaisesRegex(
             hl.utils.java.HailUserError,
-            'Found unexpected value Unknown in imputed sex file',
+            'Found unexpected value UNKNOWN in imputed sex file',
             ht.collect,
         )
 
@@ -60,3 +67,108 @@ def test_remap_pedigree_hash(self) -> None:
             ),
             -560434714,
         )
+
+    def test_import_vcf(self) -> None:
+        self.assertRaisesRegex(
+            TypeError,
+            'missing 1 required positional argument',
+            import_vcf,
+            'abc',
+        )
+        self.assertRaisesRegex(
+            SeqrValidationError,
+            'Unable to access the VCF in cloud storage',
+            import_vcf,
+            'bad.vcf',
+            ReferenceGenome.GRCh38,
+        )
+        with mock.patch('v03_pipeline.lib.misc.io.hl.read_table') as mock_read_table:
+            mock_read_table.side_effect = hl.utils.java.FatalError(
+                'GoogleJsonResponseException: 403 Forbidden',
+            )
+            self.assertRaisesRegex(
+                SeqrValidationError,
+                'Unable to access the VCF in cloud storage',
+                import_vcf,
+                'abc123/bad.vcf',
+                ReferenceGenome.GRCh38,
+            )
+        self.assertRaisesRegex(
+            SeqrValidationError,
+            'VCF failed file format validation: Your input file has a malformed header: We never saw the required CHROM header line \\(starting with one #\\) for the input VCF file',
+            import_vcf,
+            TEST_PEDIGREE_3,
+            ReferenceGenome.GRCh38,
+        )
+        self.assertRaisesRegex(
+            SeqrValidationError,
+            "VCF failed file format validation: invalid character 'N' in integer literal",
+            import_vcf,
+            TEST_INVALID_VCF,
+            ReferenceGenome.GRCh38,
+        )
+
+    def test_select_missing_field(self) -> None:
+        self.assertRaisesRegex(
+            SeqrValidationError,
+            "Your callset is missing a required field: 'a magic field'",
+            select_relevant_fields,
+            hl.MatrixTable.from_parts(
+                rows={
+                    'locus': [
+                        hl.Locus(
+                            contig='chr1',
+                            position=1,
+                            reference_genome='GRCh38',
+                        ),
+                    ],
+                    'alleles': [
+                        ['A', 'C'],
+                    ],
+                    'rsid': ['rs1233'],
+                    'filters': [{'PASS'}],
+                },
+                cols={'s': ['sample_1']},
+                entries={
+                    'GT': [[hl.Call([0, 0])]],
+                    'AD': [[[0, 20]]],
+                    'GQ': [[99]],
+                },
+            ).key_rows_by('locus', 'alleles'),
+            DatasetType.SNV_INDEL,
+            {'a magic field': hl.tint32},
+        )
+
+    def test_split_multi_failure(self) -> None:
+        self.assertRaisesRegex(
+            SeqrValidationError,
+            'Your callset failed while attempting to split multiallelic sites.  This error can occur if the dataset contains both multiallelic variants and duplicated loci.',
+            split_multi_hts,
+            hl.MatrixTable.from_parts(
+                rows={
+                    'locus': [
+                        hl.Locus(
+                            contig='chr1',
+                            position=1,
+                            reference_genome='GRCh38',
+                        ),
+                        hl.Locus(
+                            contig='chr1',
+                            position=1,
+                            reference_genome='GRCh38',
+                        ),
+                    ],
+                    'alleles': [
+                        ['A', 'G', 'AC'],
+                        ['A', 'AT', 'C', 'G'],
+                    ],
+                },
+                cols={'s': ['sample_1']},
+                entries={
+                    'GQ': [[99], [98]],
+                },
+            )
+            .key_rows_by('locus', 'alleles')
+            .repartition(1),
+            1,
+        )
diff --git a/v03_pipeline/lib/model/definitions.py b/v03_pipeline/lib/model/definitions.py
index da2ab08c9..1bad09e28 100644
--- a/v03_pipeline/lib/model/definitions.py
+++ b/v03_pipeline/lib/model/definitions.py
@@ -13,6 +13,14 @@ class Sex(str, Enum):
     MALE = 'M'
     UNKNOWN = 'U'
 
+    @property
+    def imputed_sex_value(self):
+        return {
+            Sex.MALE: 'Male',
+            Sex.FEMALE: 'Female',
+            Sex.UNKNOWN: 'Unknown',
+        }[self]
+
 
 class PipelineVersion(str, Enum):
     V02 = 'v02'
diff --git a/v03_pipeline/lib/model/environment.py b/v03_pipeline/lib/model/environment.py
index 91e69a6a2..7b5a9792d 100644
--- a/v03_pipeline/lib/model/environment.py
+++ b/v03_pipeline/lib/model/environment.py
@@ -10,7 +10,7 @@
     'GRCH38_TO_GRCH37_LIFTOVER_REF_PATH',
     'gs://hail-common/references/grch38_to_grch37.over.chain.gz',
 )
-HAIL_TMP_DIR = os.environ.get('HAIL_TMP_DIR', '/seqr/tmp')
+HAIL_TMP_DIR = os.environ.get('HAIL_TMP_DIR', '/tmp')  # noqa: S108
 HAIL_SEARCH_DATA_DIR = os.environ.get('HAIL_SEARCH_DATA_DIR', '/seqr/hail-search-data')
 LOADING_DATASETS_DIR = os.environ.get('LOADING_DATASETS_DIR', '/seqr/seqr-loading-temp')
 PRIVATE_REFERENCE_DATASETS_DIR_DIR = os.environ.get(
@@ -21,6 +21,10 @@
     'REFERENCE_DATASETS_DIR',
     '/seqr/seqr-reference-data',
 )
+VEP_REFERENCE_DATASETS_DIR = os.environ.get(
+    'VEP_REFERENCE_DATASETS_DIR',
+    '/seqr/vep-reference-data',
+)
 
 # Allele registry secrets :/
 ALLELE_REGISTRY_SECRET_NAME = os.environ.get('ALLELE_REGISTRY_SECRET_NAME', None)
@@ -50,3 +54,4 @@ class Env:
     PROJECT_ID: str | None = PROJECT_ID
     REFERENCE_DATASETS_DIR: str = REFERENCE_DATASETS_DIR
     SHOULD_REGISTER_ALLELES: bool = SHOULD_REGISTER_ALLELES
+    VEP_REFERENCE_DATASETS_DIR: str = VEP_REFERENCE_DATASETS_DIR
diff --git a/v03_pipeline/lib/vep.py b/v03_pipeline/lib/vep.py
index 6f84d4646..bc9befd32 100644
--- a/v03_pipeline/lib/vep.py
+++ b/v03_pipeline/lib/vep.py
@@ -2,10 +2,10 @@
 
 import hail as hl
 
-from v03_pipeline.lib.model import DatasetType, ReferenceGenome
+from v03_pipeline.lib.model import DatasetType, Env, ReferenceGenome
 
 VEP_CONFIG_URI = Template(
-    'file:///seqr/vep_data/$reference_genome/vep-$reference_genome.json',
+    'file://$vep_reference_datasets_dir/$reference_genome/vep-$reference_genome.json',
 )
 
 
@@ -18,7 +18,10 @@ def run_vep(
         return ht
     return hl.vep(
         ht,
-        config=VEP_CONFIG_URI.substitute(reference_genome=reference_genome.value),
+        config=VEP_CONFIG_URI.substitute(
+            vep_reference_datasets_dir=Env.VEP_REFERENCE_DATASETS_DIR,
+            reference_genome=reference_genome.value,
+        ),
         name='vep',
         block_size=1000,
         tolerate_parse_error=True,
diff --git a/v03_pipeline/var/test/callsets/improperly_formatted.vcf b/v03_pipeline/var/test/callsets/improperly_formatted.vcf
new file mode 100644
index 000000000..3d834dc5f
--- /dev/null
+++ b/v03_pipeline/var/test/callsets/improperly_formatted.vcf
@@ -0,0 +1,128 @@
+##fileformat=VCFv4.2
+##hailversion=0.2.8-70304a52d33d
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="">
+##FORMAT=<ID=PL,Number=.,Type=Integer,Description="">
+##INFO=<ID=AC,Number=.,Type=Integer,Description="">
+##INFO=<ID=AF,Number=.,Type=Float,Description="">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="">
+##INFO=<ID=END,Number=1,Type=Integer,Description="">
+##INFO=<ID=FS,Number=1,Type=Float,Description="">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="">
+##INFO=<ID=MLEAC,Number=.,Type=Integer,Description="">
+##INFO=<ID=MLEAF,Number=.,Type=Float,Description="">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="">
+##INFO=<ID=QD,Number=1,Type=Float,Description="">
+##INFO=<ID=RPA,Number=.,Type=Integer,Description="">
+##INFO=<ID=RU,Number=1,Type=String,Description="">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="">
+##INFO=<ID=SNPEFF_AMINO_ACID_CHANGE,Number=1,Type=String,Description="">
+##INFO=<ID=SNPEFF_CODON_CHANGE,Number=1,Type=String,Description="">
+##INFO=<ID=SNPEFF_EFFECT,Number=1,Type=String,Description="">
+##INFO=<ID=SNPEFF_EXON_ID,Number=1,Type=String,Description="">
+##INFO=<ID=SNPEFF_FUNCTIONAL_CLASS,Number=1,Type=String,Description="">
+##INFO=<ID=SNPEFF_GENE_BIOTYPE,Number=1,Type=String,Description="">
+##INFO=<ID=SNPEFF_GENE_NAME,Number=1,Type=String,Description="">
+##INFO=<ID=SNPEFF_IMPACT,Number=1,Type=String,Description="">
+##INFO=<ID=SNPEFF_TRANSCRIPT_ID,Number=1,Type=String,Description="">
+##INFO=<ID=STR,Number=0,Type=Flag,Description="">
+##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="">
+##INFO=<ID=culprit,Number=1,Type=String,Description="">
+##INFO=<ID=set,Number=1,Type=String,Description="">
+##INFO=<ID=CSQ,Number=.,Type=String,Description="">
+##contig=<ID=1,length=249250621,assembly=GRCh37>
+##contig=<ID=2,length=243199373,assembly=GRCh37>
+##contig=<ID=3,length=198022430,assembly=GRCh37>
+##contig=<ID=4,length=191154276,assembly=GRCh37>
+##contig=<ID=5,length=180915260,assembly=GRCh37>
+##contig=<ID=6,length=171115067,assembly=GRCh37>
+##contig=<ID=7,length=159138663,assembly=GRCh37>
+##contig=<ID=8,length=146364022,assembly=GRCh37>
+##contig=<ID=9,length=141213431,assembly=GRCh37>
+##contig=<ID=10,length=135534747,assembly=GRCh37>
+##contig=<ID=11,length=135006516,assembly=GRCh37>
+##contig=<ID=12,length=133851895,assembly=GRCh37>
+##contig=<ID=13,length=115169878,assembly=GRCh37>
+##contig=<ID=14,length=107349540,assembly=GRCh37>
+##contig=<ID=15,length=102531392,assembly=GRCh37>
+##contig=<ID=16,length=90354753,assembly=GRCh37>
+##contig=<ID=17,length=81195210,assembly=GRCh37>
+##contig=<ID=18,length=78077248,assembly=GRCh37>
+##contig=<ID=19,length=59128983,assembly=GRCh37>
+##contig=<ID=20,length=63025520,assembly=GRCh37>
+##contig=<ID=21,length=48129895,assembly=GRCh37>
+##contig=<ID=22,length=51304566,assembly=GRCh37>
+##contig=<ID=X,length=155270560,assembly=GRCh37>
+##contig=<ID=Y,length=59373566,assembly=GRCh37>
+##contig=<ID=MT,length=16569,assembly=GRCh37>
+##contig=<ID=GL000207.1,length=4262,assembly=GRCh37>
+##contig=<ID=GL000226.1,length=15008,assembly=GRCh37>
+##contig=<ID=GL000229.1,length=19913,assembly=GRCh37>
+##contig=<ID=GL000231.1,length=27386,assembly=GRCh37>
+##contig=<ID=GL000210.1,length=27682,assembly=GRCh37>
+##contig=<ID=GL000239.1,length=33824,assembly=GRCh37>
+##contig=<ID=GL000235.1,length=34474,assembly=GRCh37>
+##contig=<ID=GL000201.1,length=36148,assembly=GRCh37>
+##contig=<ID=GL000247.1,length=36422,assembly=GRCh37>
+##contig=<ID=GL000245.1,length=36651,assembly=GRCh37>
+##contig=<ID=GL000197.1,length=37175,assembly=GRCh37>
+##contig=<ID=GL000203.1,length=37498,assembly=GRCh37>
+##contig=<ID=GL000246.1,length=38154,assembly=GRCh37>
+##contig=<ID=GL000249.1,length=38502,assembly=GRCh37>
+##contig=<ID=GL000196.1,length=38914,assembly=GRCh37>
+##contig=<ID=GL000248.1,length=39786,assembly=GRCh37>
+##contig=<ID=GL000244.1,length=39929,assembly=GRCh37>
+##contig=<ID=GL000238.1,length=39939,assembly=GRCh37>
+##contig=<ID=GL000202.1,length=40103,assembly=GRCh37>
+##contig=<ID=GL000234.1,length=40531,assembly=GRCh37>
+##contig=<ID=GL000232.1,length=40652,assembly=GRCh37>
+##contig=<ID=GL000206.1,length=41001,assembly=GRCh37>
+##contig=<ID=GL000240.1,length=41933,assembly=GRCh37>
+##contig=<ID=GL000236.1,length=41934,assembly=GRCh37>
+##contig=<ID=GL000241.1,length=42152,assembly=GRCh37>
+##contig=<ID=GL000243.1,length=43341,assembly=GRCh37>
+##contig=<ID=GL000242.1,length=43523,assembly=GRCh37>
+##contig=<ID=GL000230.1,length=43691,assembly=GRCh37>
+##contig=<ID=GL000237.1,length=45867,assembly=GRCh37>
+##contig=<ID=GL000233.1,length=45941,assembly=GRCh37>
+##contig=<ID=GL000204.1,length=81310,assembly=GRCh37>
+##contig=<ID=GL000198.1,length=90085,assembly=GRCh37>
+##contig=<ID=GL000208.1,length=92689,assembly=GRCh37>
+##contig=<ID=GL000191.1,length=106433,assembly=GRCh37>
+##contig=<ID=GL000227.1,length=128374,assembly=GRCh37>
+##contig=<ID=GL000228.1,length=129120,assembly=GRCh37>
+##contig=<ID=GL000214.1,length=137718,assembly=GRCh37>
+##contig=<ID=GL000221.1,length=155397,assembly=GRCh37>
+##contig=<ID=GL000209.1,length=159169,assembly=GRCh37>
+##contig=<ID=GL000218.1,length=161147,assembly=GRCh37>
+##contig=<ID=GL000220.1,length=161802,assembly=GRCh37>
+##contig=<ID=GL000213.1,length=164239,assembly=GRCh37>
+##contig=<ID=GL000211.1,length=166566,assembly=GRCh37>
+##contig=<ID=GL000199.1,length=169874,assembly=GRCh37>
+##contig=<ID=GL000217.1,length=172149,assembly=GRCh37>
+##contig=<ID=GL000216.1,length=172294,assembly=GRCh37>
+##contig=<ID=GL000215.1,length=172545,assembly=GRCh37>
+##contig=<ID=GL000205.1,length=174588,assembly=GRCh37>
+##contig=<ID=GL000219.1,length=179198,assembly=GRCh37>
+##contig=<ID=GL000224.1,length=179693,assembly=GRCh37>
+##contig=<ID=GL000223.1,length=180455,assembly=GRCh37>
+##contig=<ID=GL000195.1,length=182896,assembly=GRCh37>
+##contig=<ID=GL000212.1,length=186858,assembly=GRCh37>
+##contig=<ID=GL000222.1,length=186861,assembly=GRCh37>
+##contig=<ID=GL000200.1,length=187035,assembly=GRCh37>
+##contig=<ID=GL000193.1,length=189789,assembly=GRCh37>
+##contig=<ID=GL000194.1,length=191469,assembly=GRCh37>
+##contig=<ID=GL000225.1,length=211173,assembly=GRCh37>
+##contig=<ID=GL000192.1,length=547496,assembly=GRCh37>
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG00731	HG00732	HG00733	NA19675	NA19678	NA19679	NA20870	NA20872	NA20874	NA20875	NA20876	NA20877	NA20878	NA20881	NA20885	NA20888
+1	871269	.	A	C	368.47	PASS	AC=1;AF=3.10000e-02;AN=32;BaseQRankSum=-1.74060e+01;DP=351;Dels=0.00000e+00;FS=9.28030e+01;HaplotypeScore=5.13800e-01;InbreedingCoeff=-3.32000e-02;MQ=5.93000e+01;MQ0=0;MQRankSum=2.37300e+00;QD=4.80000e-01;ReadPosRankSum=-1.17980e+01;SNPEFF_AMINO_ACID_CHANGE=R141;SNPEFF_CODON_CHANGE=cgA/cgC;SNPEFF_EFFECT=SYNONYMOUS_CODING;SNPEFF_EXON_ID=exon_1_871152_871276;SNPEFF_FUNCTIONAL_CLASS=SILENT;SNPEFF_GENE_BIOTYPE=protein_coding;SNPEFF_GENE_NAME=SAMD11;SNPEFF_IMPACT=LOW;SNPEFF_TRANSCRIPT_ID=ENST00000342066;VQSLOD=-8.16700e-01;culprit=QD;CSQ=C|ENSG00000187634|ENST00000455979|Transcript|upstream_gene_variant|||||||1|3386|1|SAMD11|HGNC|28706|protein_coding||||ENSP00000412228|||UPI000155D479||||||||||||||||||||||||EXON_INTRON_UNDEF|||||||,C|ENSG00000187634|ENST00000420190|Transcript|synonymous_variant|512|423|141|R|cgA/cgC||1||1|SAMD11|HGNC|28706|protein_coding||||ENSP00000411579||Q5SV95_HUMAN&I7FV93_HUMAN&A6PWC8_HUMAN|UPI000155D47C|||5/7|||ENST00000420190.1:c.423N>C|ENST00000420190.1:c.423N>C(p.%3D)|||||||||||||||POSITION:0.787709497206704||NON_CAN_SPLICE_SURR|||||||,C|ENSG00000268179|ENST00000598827|Transcript|upstream_gene_variant|||||||1|4824|-1|AL645608.1|Clone_based_ensembl_gene||protein_coding|YES|||ENSP00000471152||M0R0C9_HUMAN|UPI0000D61E05||||||||||||||||||||||||EXON_INTRON_UNDEF|||||||,C|ENSG00000187634|ENST00000437963|Transcript|downstream_gene_variant|||||||1|96|1|SAMD11|HGNC|28706|protein_coding||||ENSP00000393181||Q5SV95_HUMAN&I7FV93_HUMAN|UPI000155D47B||||||||||||||||||||||||EXON_INTRON_UNDEF|||||||,C|ENSG00000187634|ENST00000478729|Transcript|upstream_gene_variant|||||||1|4457|1|SAMD11|HGNC|28706|processed_transcript||||||||||||||||||||||||||||||||||||||,C|ENSG00000187634|ENST00000342066|Transcript|synonymous_variant|506|423|141|R|cgA/cgC||1||1|SAMD11|HGNC|28706|protein_coding|YES||CCDS2.2|ENSP00000342313|SAM11_HUMAN|Q5SV95_HUMAN&I7FV93_HUMAN&A6PWC8_HUMAN|UPI0000D61E04|||5/14|||ENST00000342066.3:c.423N>C|ENST00000342066.3:c.423N>C(p.%3D)|||||||||||||||POSITION:0.206744868035191||NON_CAN_SPLICE_SURR|||||||,C|ENSG00000187634|ENST00000341065|Transcript|synonymous_variant|194|195|65|R|cgA/cgC||1||1|SAMD11|HGNC|28706|protein_coding||||ENSP00000349216|||UPI000155D47A|||3/12|||ENST00000341065.4:c.194N>C|ENST00000341065.4:c.194N>C(p.%3D)|||||||||||||||POSITION:0.110231769361221||NON_CAN_SPLICE_SURR|||||||,C||ENSR00000528855|RegulatoryFeature|regulatory_region_variant|||||||1||||||regulatory_region||||||||||||||||||||||||||||||||||||||	GT:AD:DP:GQ:PL	0/0:34,0:34:99:0,102,1073	0/0:34,0:34:99:0,102,1064	0/0:37,0:37:99:0,108,1155	0/0:8,3:11:24:0,24,226	0/1:11,4:16:32:32,0,300	0/0:10,0:10:30:0,30,306	0/0:13,0:13:39:0,39,410	0/0:11,0:11:33:0,33,323	0/0:21,3:23:12:0,12,434	0/0:19,0:19:57:0,57,581	0/0:25,4:28:27:0,27,553	0/0:17,1:18:51:0,51,524	0/0:25,0:25:75:0,75,759	0/0:21,0:21:63:0,63,687	0/0:23,4:27:69:0,69,709	0/0:22,2:24:60:0,60,NABC
diff --git a/v03_pipeline/var/test/sex_check/test_imputed_sex.tsv b/v03_pipeline/var/test/sex_check/test_imputed_sex.tsv
index 5d5aea039..e745fbddd 100644
--- a/v03_pipeline/var/test/sex_check/test_imputed_sex.tsv
+++ b/v03_pipeline/var/test/sex_check/test_imputed_sex.tsv
@@ -1,4 +1,4 @@
 entity:sample_id	collaborator_participant_id	collaborator_sample_id	contamination_rate	coverage_region_1_metrics_file	crai_path	cram_md5_path	cram_path	datarepo_row_id	dragen_version	import:snapshot_id	import:timestamp	mapped_percentage	mapping_metrics_file	material_type	mean_coverage	original_material_type	participant_id	pass_fail_value	pdo	percent_bases_at_20x	percent_callability	predicted_sex	product	receipt_date	reported_sex	research_project	single_sample_vcf_index_path	single_sample_vcf_md5_path	single_sample_vcf_path	total_bases	variant_calling_metrics_file
 SM-DM66X	abc_1	abc_1	0E+00	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/6f30a41f-1d91-44d1-915c-5c10c6d87fcd/WAL_LIS6100_LIS6101.qc-coverage-region-1_coverage_metrics.csv	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/3e204a66-f044-4bdc-ade4-1671a0269214/WAL_LIS6100_LIS6101.cram.crai	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/a6ed4850-6a69-412e-a071-bf8cce04fca0/WAL_LIS6100_LIS6101.cram.md5sum	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/c51bbfd6-42f0-40ca-aa0c-b5eece935516/WAL_LIS6100_LIS6101.cram	8a07ce00-16a1-40f4-8666-c4cfaad1bbe1	07.021.604.3.7.8	cc9d9ed9-785a-407d-910e-d9bd46936fa6	2024-04-17T14:58:10	98.450000000	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/9e745b1d-2c00-44ce-bbfb-31c44369f4fe/WAL_LIS6100_LIS6101.mapping_metrics.csv	DNA:DNA Genomic	35.730000000	Whole Blood:Whole Blood	PT-24FB4	Pass	PDO-32851	96.140000000	97.850000000	Male	P-WG-0139	2017-03-15 04:00:00	Male	RP-3071	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/360ec721-0af8-4085-a677-38c018069559/WAL_LIS6100_LIS6101.vcf.gz.tbi	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/8da8cda2-497f-4a8b-a642-af4a4ad28aac/WAL_LIS6100_LIS6101.vcf.gz.md5sum	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/0a2d93fb-8837-4b6f-ac68-a6b9701f9a08/WAL_LIS6100_LIS6101.vcf.gz	134324623400.000000000	gs://datarepo-9cafeffd-bucket/f511b131-3f0d-4eb7-a7f0-b2b3d73dca3a/f7b62337-1339-4c2e-8280-281c48604e07/WAL_LIS6100_LIS6101.vc_metrics.csv
 SM-DM69X	abc_2	abc_2	0E+00	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c4c07edf-7735-4aa7-9283-7cb2607b60a2/GLE-5774-3-3.qc-coverage-region-1_coverage_metrics.csv	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/dcd4c271-0249-47f1-8e91-81f74735c5a1/GLE-5774-3-3.cram.crai	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/ec41ec06-673f-4fe2-a063-23dc5fe1dcce/GLE-5774-3-3.cram.md5sum	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/aad0e270-2ad5-4f39-b968-9b4beafeb5cc/GLE-5774-3-3.cram	a4b04a39-9234-4028-a155-442c4acf12a0	07.021.604.3.7.8	ce74d94c-c33d-49d7-85c9-5f3cbd08aff7	2024-04-17T15:02:46	99.800000000	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c3a9e6f2-4c68-410b-823d-46ca406e5061/GLE-5774-3-3.mapping_metrics.csv	DNA:DNA Genomic	35.300000000	Whole Blood:Whole Blood	PT-24OHM	Pass	PDO-32755	96.320000000	97.340000000	Female	P-WG-0139	2017-04-12 04:00:00	Female	RP-3061	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c71cd2a1-c789-4715-9ebc-dbfc40d9f2e2/GLE-5774-3-3.vcf.gz.tbi	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/957a99cb-c9a9-4fc5-a0ec-53f9e461469e/GLE-5774-3-3.vcf.gz.md5sum	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/df520949-5f2b-4976-9d46-80d1cc299813/GLE-5774-3-3.vcf.gz	133253714921.000000000	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/2e98e51b-9394-4e64-977f-e9010a4e16dc/GLE-5774-3-3.vc_metrics.csv
-SM-DPB5G	abc_3	abc_3	0E+00	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram	ffb50687-165e-425a-a545-c3797d3a28d4	07.021.604.3.7.8	55729ba9-3ce4-47b3-9c3b-1148737ae40f	2024-04-17T15:07:57	99.670000000	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv	DNA:DNA Genomic	41.910000000	Whole Blood:Whole Blood	PT-25BR5	Pass	PDO-32756	92.920000000	97.990000000	Male	P-WG-0139	2017-05-19 04:00:00	Male	RP-3062	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz	156149580126.000000000	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv
+SM-DPB5G	abc_3	abc_3	0E+00	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/432f8354-77e0-4381-9bb5-dfdc0633b5b2/PIE_OGI1433_002628_1.qc-coverage-region-1_coverage_metrics.csv	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/3dc623fa-2a45-4b3d-a0f8-fcdec09f9418/PIE_OGI1433_002628_1.cram.crai	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/895966ef-c705-4c18-952d-03863243a184/PIE_OGI1433_002628_1.cram.md5sum	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/96ca6d5f-fb23-4102-bb5e-c7bbfd194e1c/PIE_OGI1433_002628_1.cram	ffb50687-165e-425a-a545-c3797d3a28d4	07.021.604.3.7.8	55729ba9-3ce4-47b3-9c3b-1148737ae40f	2024-04-17T15:07:57	99.670000000	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/30f8e208-5d2d-4ce8-b835-695b5ed673f4/PIE_OGI1433_002628_1.mapping_metrics.csv	DNA:DNA Genomic	41.910000000	Whole Blood:Whole Blood	PT-25BR5	Pass	PDO-32756	92.920000000	97.990000000	Unknown	P-WG-0139	2017-05-19 04:00:00	Unknown	RP-3062	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/1641d1b2-1035-4cc3-9c8b-0c8cb430f56b/PIE_OGI1433_002628_1.vcf.gz.tbi	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/f5ba2708-899e-42e8-b287-fdf72c2e404d/PIE_OGI1433_002628_1.vcf.gz.md5sum	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/e925ee5d-a75e-471f-adfd-2756c8690069/PIE_OGI1433_002628_1.vcf.gz	156149580126.000000000	gs://datarepo-c41dc160-bucket/907593be-8862-4945-9e70-f758b6448b8d/df076bc5-9db8-44f0-a3fe-f693370634cc/PIE_OGI1433_002628_1.vc_metrics.csv
diff --git a/v03_pipeline/var/test/sex_check/test_imputed_sex_unexpected_value.tsv b/v03_pipeline/var/test/sex_check/test_imputed_sex_unexpected_value.tsv
index 7ab98ac61..03a5f120e 100644
--- a/v03_pipeline/var/test/sex_check/test_imputed_sex_unexpected_value.tsv
+++ b/v03_pipeline/var/test/sex_check/test_imputed_sex_unexpected_value.tsv
@@ -1,2 +1,2 @@
 entity:sample_id	collaborator_participant_id	collaborator_sample_id	contamination_rate	coverage_region_1_metrics_file	crai_path	cram_md5_path	cram_path	datarepo_row_id	dragen_version	import:snapshot_id	import:timestamp	mapped_percentage	mapping_metrics_file	material_type	mean_coverage	original_material_type	participant_id	pass_fail_value	pdo	percent_bases_at_20x	percent_callability	predicted_sex	product	receipt_date	reported_sex	research_project	single_sample_vcf_index_path	single_sample_vcf_md5_path	single_sample_vcf_path	total_bases	variant_calling_metrics_file
-SM-DM69X	abc_2	abc_2	0E+00	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c4c07edf-7735-4aa7-9283-7cb2607b60a2/GLE-5774-3-3.qc-coverage-region-1_coverage_metrics.csv	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/dcd4c271-0249-47f1-8e91-81f74735c5a1/GLE-5774-3-3.cram.crai	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/ec41ec06-673f-4fe2-a063-23dc5fe1dcce/GLE-5774-3-3.cram.md5sum	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/aad0e270-2ad5-4f39-b968-9b4beafeb5cc/GLE-5774-3-3.cram	a4b04a39-9234-4028-a155-442c4acf12a0	07.021.604.3.7.8	ce74d94c-c33d-49d7-85c9-5f3cbd08aff7	2024-04-17T15:02:46	99.800000000	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c3a9e6f2-4c68-410b-823d-46ca406e5061/GLE-5774-3-3.mapping_metrics.csv	DNA:DNA Genomic	35.300000000	Whole Blood:Whole Blood	PT-24OHM	Pass	PDO-32755	96.320000000	97.340000000	Unknown	P-WG-0139	2017-04-12 04:00:00	Unknown	RP-3061	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c71cd2a1-c789-4715-9ebc-dbfc40d9f2e2/GLE-5774-3-3.vcf.gz.tbi	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/957a99cb-c9a9-4fc5-a0ec-53f9e461469e/GLE-5774-3-3.vcf.gz.md5sum	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/df520949-5f2b-4976-9d46-80d1cc299813/GLE-5774-3-3.vcf.gz	133253714921.000000000	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/2e98e51b-9394-4e64-977f-e9010a4e16dc/GLE-5774-3-3.vc_metrics.csv
+SM-DM69X	abc_2	abc_2	0E+00	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c4c07edf-7735-4aa7-9283-7cb2607b60a2/GLE-5774-3-3.qc-coverage-region-1_coverage_metrics.csv	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/dcd4c271-0249-47f1-8e91-81f74735c5a1/GLE-5774-3-3.cram.crai	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/ec41ec06-673f-4fe2-a063-23dc5fe1dcce/GLE-5774-3-3.cram.md5sum	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/aad0e270-2ad5-4f39-b968-9b4beafeb5cc/GLE-5774-3-3.cram	a4b04a39-9234-4028-a155-442c4acf12a0	07.021.604.3.7.8	ce74d94c-c33d-49d7-85c9-5f3cbd08aff7	2024-04-17T15:02:46	99.800000000	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c3a9e6f2-4c68-410b-823d-46ca406e5061/GLE-5774-3-3.mapping_metrics.csv	DNA:DNA Genomic	35.300000000	Whole Blood:Whole Blood	PT-24OHM	Pass	PDO-32755	96.320000000	97.340000000	UNKNOWN	P-WG-0139	2017-04-12 04:00:00	UNKNOWN	RP-3061	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/c71cd2a1-c789-4715-9ebc-dbfc40d9f2e2/GLE-5774-3-3.vcf.gz.tbi	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/957a99cb-c9a9-4fc5-a0ec-53f9e461469e/GLE-5774-3-3.vcf.gz.md5sum	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/df520949-5f2b-4976-9d46-80d1cc299813/GLE-5774-3-3.vcf.gz	133253714921.000000000	gs://datarepo-556a9c15-bucket/2a4202b0-93f5-4ebe-8d2b-fd4cfb2b881d/2e98e51b-9394-4e64-977f-e9010a4e16dc/GLE-5774-3-3.vc_metrics.csv