diff --git a/pyproject.toml b/pyproject.toml
index 73826b005..2ffbdfe04 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,6 @@ extend-exclude = [
     'luigi_pipeline/lib/*',
     'luigi_pipeline/seqr*.py',
     'luigi_pipeline/tests/data/*',
-    'v03_pipeline/lib/reference_data/gencode/*',
 ]
 ignore = [
     # Individual Rules
diff --git a/requirements.in b/requirements.in
index 3cff99f79..af19fd655 100644
--- a/requirements.in
+++ b/requirements.in
@@ -3,6 +3,5 @@ google-api-python-client>=1.8.0
 hail==0.2.132
 luigi>=3.4.0
 gnomad==0.6.4
-google-cloud-storage>=2.14.0
 aiofiles==24.1.0
 pydantic==2.8.2
diff --git a/requirements.txt b/requirements.txt
index 4083931f8..a565a7c41 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,9 +8,9 @@ aiodns==2.0.0
     # via hail
 aiofiles==24.1.0
     # via -r requirements.in
-aiohappyeyeballs==2.3.5
+aiohappyeyeballs==2.4.3
     # via aiohttp
-aiohttp==3.10.2
+aiohttp==3.10.10
     # via
     #   hail
     #   slackclient
@@ -22,122 +22,105 @@ asttokens==2.4.1
     # via stack-data
 async-timeout==4.0.3
     # via aiohttp
-attrs==23.1.0
+attrs==24.2.0
     # via aiohttp
 avro==1.11.3
     # via hail
 azure-common==1.1.28
     # via azure-mgmt-storage
-azure-core==1.29.5
+azure-core==1.31.0
     # via
     #   azure-identity
     #   azure-mgmt-core
     #   azure-storage-blob
     #   msrest
-azure-identity==1.16.1
+azure-identity==1.19.0
     # via hail
 azure-mgmt-core==1.4.0
     # via azure-mgmt-storage
 azure-mgmt-storage==20.1.0
     # via hail
-azure-storage-blob==12.19.0
+azure-storage-blob==12.23.1
     # via hail
-bokeh==3.3.1
+bokeh==3.3.4
     # via hail
-boto3==1.33.1
+boto3==1.35.48
     # via hail
-botocore==1.33.1
+botocore==1.35.48
     # via
     #   boto3
     #   hail
     #   s3transfer
-cachetools==5.3.2
+cachetools==5.5.0
     # via google-auth
-certifi==2023.11.17
+certifi==2024.8.30
     # via
     #   elasticsearch
     #   msrest
     #   requests
-cffi==1.16.0
+cffi==1.17.1
     # via
     #   cryptography
     #   pycares
-charset-normalizer==3.3.2
+charset-normalizer==3.4.0
     # via requests
 click==8.1.7
     # via typer
-comm==0.2.0
+comm==0.2.2
     # via ipywidgets
 commonmark==0.9.1
     # via rich
-contourpy==1.2.0
+contourpy==1.3.0
     # via bokeh
-cryptography==43.0.1
+cryptography==43.0.3
     # via
     #   azure-identity
     #   azure-storage-blob
     #   msal
     #   pyjwt
-cython==0.29.36
-    # via hdbscan
 decorator==4.4.2
     # via
     #   hail
     #   ipython
 deprecated==1.2.14
     # via hail
-dill==0.3.7
+dill==0.3.9
     # via hail
-docutils==0.20.1
+docutils==0.21.2
     # via python-daemon
 elasticsearch==7.9.1
     # via -r requirements.in
-exceptiongroup==1.2.0
+exceptiongroup==1.2.2
     # via ipython
-executing==2.0.1
+executing==2.1.0
     # via stack-data
-frozenlist==1.4.0
+frozenlist==1.5.0
     # via
     #   aiohttp
     #   aiosignal
     #   hail
 gnomad==0.6.4
     # via -r requirements.in
-google-api-core==2.14.0
-    # via
-    #   google-api-python-client
-    #   google-cloud-core
-    #   google-cloud-storage
-google-api-python-client==2.108.0
+google-api-core==2.21.0
+    # via google-api-python-client
+google-api-python-client==2.149.0
     # via -r requirements.in
-google-auth==2.23.4
+google-auth==2.35.0
     # via
     #   google-api-core
     #   google-api-python-client
     #   google-auth-httplib2
     #   google-auth-oauthlib
-    #   google-cloud-core
-    #   google-cloud-storage
     #   hail
-google-auth-httplib2==0.1.1
+google-auth-httplib2==0.2.0
     # via google-api-python-client
 google-auth-oauthlib==0.8.0
     # via hail
-google-cloud-core==2.4.1
-    # via google-cloud-storage
-google-cloud-storage==2.14.0
-    # via -r requirements.in
-google-crc32c==1.5.0
-    # via
-    #   google-cloud-storage
-    #   google-resumable-media
-google-resumable-media==2.7.0
-    # via google-cloud-storage
-googleapis-common-protos==1.61.0
+googleapis-common-protos==1.65.0
     # via google-api-core
 hail==0.2.132
     # via -r requirements.in
-hdbscan==0.8.33
+hdbscan==0.8.39
     # via gnomad
 httplib2==0.22.0
     # via
@@ -145,15 +128,15 @@ httplib2==0.22.0
     #   google-auth-httplib2
 humanize==1.1.0
     # via hail
-idna==3.6
+idna==3.10
     # via
     #   requests
     #   yarl
-ipython==8.18.1
+ipython==8.28.0
     # via ipywidgets
-ipywidgets==8.1.1
+ipywidgets==8.1.5
     # via gnomad
-isodate==0.6.1
+isodate==0.7.2
     # via
     #   azure-storage-blob
     #   msrest
@@ -161,43 +144,43 @@ janus==1.0.0
     # via hail
 jedi==0.19.1
     # via ipython
-jinja2==3.1.3
+jinja2==3.1.4
     # via bokeh
 jmespath==1.0.1
     # via
     #   boto3
     #   botocore
-joblib==1.3.2
+joblib==1.4.2
     # via
     #   hdbscan
     #   scikit-learn
-jproperties==2.1.1
+jproperties==2.1.2
     # via hail
-jupyterlab-widgets==3.0.9
+jupyterlab-widgets==3.0.13
     # via ipywidgets
 lockfile==0.12.2
     # via python-daemon
-luigi==3.4.0
+luigi==3.5.2
     # via -r requirements.in
-markupsafe==2.1.3
+markupsafe==3.0.2
     # via jinja2
-matplotlib-inline==0.1.6
+matplotlib-inline==0.1.7
     # via ipython
-msal==1.28.0
+msal==1.31.0
     # via
     #   azure-identity
     #   msal-extensions
-msal-extensions==1.0.0
+msal-extensions==1.2.0
     # via azure-identity
 msrest==0.7.1
     # via azure-mgmt-storage
-multidict==6.0.4
+multidict==6.1.0
     # via
     #   aiohttp
     #   yarl
-nest-asyncio==1.5.8
+nest-asyncio==1.6.0
     # via hail
-numpy==1.26.2
+numpy==1.26.4
     # via
     #   bokeh
     #   contourpy
@@ -208,102 +191,108 @@ numpy==1.26.2
     #   scipy
 oauthlib==3.2.2
     # via requests-oauthlib
-orjson==3.10.6
+orjson==3.10.10
     # via hail
-packaging==23.2
+packaging==24.1
     # via
     #   bokeh
     #   plotly
-pandas==2.1.3
+pandas==2.2.3
     # via
     #   bokeh
     #   hail
 parsimonious==0.10.0
     # via hail
-parso==0.8.3
+parso==0.8.4
     # via jedi
 pexpect==4.9.0
     # via ipython
-pillow==10.3.0
+pillow==11.0.0
     # via bokeh
-plotly==5.18.0
+plotly==5.24.1
     # via hail
-portalocker==2.8.2
+portalocker==2.10.1
     # via msal-extensions
-prompt-toolkit==3.0.41
+prompt-toolkit==3.0.48
     # via ipython
+propcache==0.2.0
+    # via yarl
+proto-plus==1.25.0
+    # via google-api-core
 protobuf==3.20.2
     # via
     #   google-api-core
     #   googleapis-common-protos
     #   hail
+    #   proto-plus
 ptyprocess==0.7.0
     # via pexpect
-pure-eval==0.2.2
+pure-eval==0.2.3
     # via stack-data
 py4j==0.10.9.7
     # via pyspark
-pyasn1==0.5.1
+pyasn1==0.6.1
     # via
     #   pyasn1-modules
     #   rsa
-pyasn1-modules==0.3.0
+pyasn1-modules==0.4.1
     # via google-auth
 pycares==4.4.0
     # via aiodns
-pycparser==2.21
+pycparser==2.22
     # via cffi
 pydantic==2.8.2
     # via -r requirements.in
 pydantic-core==2.20.1
     # via pydantic
-pygments==2.17.2
+pygments==2.18.0
     # via
     #   ipython
     #   rich
-pyjwt[crypto]==2.8.0
+pyjwt[crypto]==2.9.0
     # via msal
-pyparsing==3.1.1
+pyparsing==3.2.0
     # via httplib2
-pyspark==3.5.1
+pyspark==3.5.3
     # via hail
 python-daemon==3.0.1
     # via luigi
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via
     #   botocore
     #   luigi
     #   pandas
 python-json-logger==2.0.7
     # via hail
-pytz==2023.3.post1
+pytz==2024.2
     # via pandas
-pyyaml==6.0.1
+pyyaml==6.0.2
     # via
     #   bokeh
     #   hail
-regex==2023.10.3
+regex==2024.9.11
     # via parsimonious
-requests==2.31.0
+requests==2.32.3
     # via
     #   azure-core
     #   google-api-core
-    #   google-cloud-storage
     #   hail
     #   msal
     #   msrest
     #   requests-oauthlib
-requests-oauthlib==1.3.1
+requests-oauthlib==2.0.0
     # via
     #   google-auth-oauthlib
     #   msrest
 rich==12.6.0
-    # via hail
+    # via
+    #   hail
+    #   typer
 rsa==4.9
     # via google-auth
-s3transfer==0.8.0
+s3transfer==0.10.3
     # via boto3
-scikit-learn==1.5.0
+scikit-learn==1.5.2
     # via
     #   gnomad
     #   hdbscan
@@ -312,11 +301,12 @@ scipy==1.11.4
     #   hail
     #   hdbscan
     #   scikit-learn
+shellingham==1.5.4
+    # via typer
 six==1.16.0
     # via
     #   asttokens
     #   azure-core
-    #   isodate
     #   jproperties
     #   python-dateutil
 slackclient==2.5.0
@@ -327,52 +317,55 @@ stack-data==0.6.3
     # via ipython
 tabulate==0.9.0
     # via hail
-tenacity==8.2.3
+tenacity==8.5.0
     # via
     #   luigi
     #   plotly
-threadpoolctl==3.2.0
+threadpoolctl==3.5.0
     # via scikit-learn
 tornado==6.4.1
     # via
     #   bokeh
     #   luigi
-traitlets==5.14.0
+traitlets==5.14.3
     # via
     #   comm
     #   ipython
     #   ipywidgets
     #   matplotlib-inline
-typer==0.9.0
+typer==0.12.5
     # via hail
-typing-extensions==4.8.0
+typing-extensions==4.12.2
     # via
     #   azure-core
+    #   azure-identity
     #   azure-storage-blob
+    #   ipython
     #   janus
+    #   multidict
     #   pydantic
     #   pydantic-core
     #   typer
-tzdata==2023.3
+tzdata==2024.2
     # via pandas
 uritemplate==4.1.1
     # via google-api-python-client
-urllib3==2.0.7
+urllib3==2.2.3
     # via
     #   botocore
     #   elasticsearch
     #   requests
-uvloop==0.19.0
+uvloop==0.21.0
     # via hail
-wcwidth==0.2.12
+wcwidth==0.2.13
     # via prompt-toolkit
-widgetsnbextension==4.0.9
+widgetsnbextension==4.0.13
     # via ipywidgets
 wrapt==1.16.0
     # via deprecated
-xyzservices==2023.10.1
+xyzservices==2024.9.0
     # via bokeh
-yarl==1.9.3
+yarl==1.16.0
     # via aiohttp
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/v03_pipeline/bin/rsync_reference_data.bash b/v03_pipeline/bin/rsync_reference_data.bash
index 9dfc91d74..825c583e5 100755
--- a/v03_pipeline/bin/rsync_reference_data.bash
+++ b/v03_pipeline/bin/rsync_reference_data.bash
@@ -16,12 +16,34 @@ case $REFERENCE_GENOME in
     exit 1
 esac
 
-mkdir -p $REFERENCE_DATASETS_DIR/$REFERENCE_GENOME;
+case $REFERENCE_DATASETS_DIR in
+  "gs://seqr-reference-data")
+    echo "Cannot rsync to the authoritative source"
+    exit 1
+    ;;
+    *)
+    ;;
+esac
 
-if [ -f "$REFERENCE_DATASETS_DIR"/"$REFERENCE_GENOME"/_SUCCESS ]; then
-   echo "Skipping rsync because already successful"
-   exit 0;
+if ! [[ "$REFERENCE_DATASETS_DIR" =~ gs://* ]]; then
+  mkdir -p $REFERENCE_DATASETS_DIR/$REFERENCE_GENOME;
+  if [ -f "$REFERENCE_DATASETS_DIR"/"$REFERENCE_GENOME"/_SUCCESS ]; then
+     echo "Skipping rsync because already successful"
+     exit 0;
+  fi
+else
+  result=$(gsutil -q stat "$REFERENCE_DATASETS_DIR"/"$REFERENCE_GENOME"/_SUCCESS || echo 1)
+  if [[ $result != 1 ]]; then
+    echo "Skipping rsync because already successful"
+    exit 0;
+  fi
 fi
 
 gsutil -m rsync -rd "gs://seqr-reference-data/v03/$REFERENCE_GENOME" $REFERENCE_DATASETS_DIR/$REFERENCE_GENOME
-touch "$REFERENCE_DATASETS_DIR"/"$REFERENCE_GENOME"/_SUCCESS
+if ! [[ $REFERENCE_DATASETS_DIR =~ gs://* ]]; then
+  touch "$REFERENCE_DATASETS_DIR"/"$REFERENCE_GENOME"/_SUCCESS
+else 
+  touch _SUCCESS
+  gsutil cp _SUCCESS "$REFERENCE_DATASETS_DIR"/"$REFERENCE_GENOME"/_SUCCESS
+  rm -rf _SUCCESS
+fi
diff --git a/v03_pipeline/lib/paths.py b/v03_pipeline/lib/paths.py
index 2295951f3..0ae158866 100644
--- a/v03_pipeline/lib/paths.py
+++ b/v03_pipeline/lib/paths.py
@@ -187,6 +187,22 @@ def relatedness_check_table_path(
     )
 
 
+def relatedness_check_tsv_path(
+    reference_genome: ReferenceGenome,
+    dataset_type: DatasetType,
+    callset_path: str,
+) -> str:
+    return os.path.join(
+        _pipeline_prefix(
+            Env.LOADING_DATASETS_DIR,
+            reference_genome,
+            dataset_type,
+        ),
+        'relatedness_check',
+        f'{hashlib.sha256(callset_path.encode("utf8")).hexdigest()}.tsv',
+    )
+
+
 def remapped_and_subsetted_callset_path(
     reference_genome: ReferenceGenome,
     dataset_type: DatasetType,
diff --git a/v03_pipeline/lib/reference_data/clinvar.py b/v03_pipeline/lib/reference_data/clinvar.py
index fd5d47561..3e482e0b6 100644
--- a/v03_pipeline/lib/reference_data/clinvar.py
+++ b/v03_pipeline/lib/reference_data/clinvar.py
@@ -120,6 +120,7 @@ def download_and_import_latest_clinvar_vcf(
     clinvar_url: str,
     reference_genome: ReferenceGenome,
 ) -> hl.Table:
+    version = parse_clinvar_release_date(clinvar_url)
     with tempfile.NamedTemporaryFile(suffix='.vcf.gz', delete=False) as tmp_file:
         urllib.request.urlretrieve(clinvar_url, tmp_file.name)  # noqa: S310
         cached_tmp_file_name = os.path.join(
@@ -139,27 +140,20 @@ def download_and_import_latest_clinvar_vcf(
             min_partitions=MIN_HT_PARTITIONS,
             force_bgz=True,
         )
-        mt = mt.annotate_globals(version=_parse_clinvar_release_date(tmp_file.name))
+        mt = mt.annotate_globals(version=version)
         return join_to_submission_summary_ht(mt.rows())
 
 
-def _parse_clinvar_release_date(local_vcf_path: str) -> str:
-    """Parse clinvar release date from the VCF header.
-
-    Args:
-        local_vcf_path (str): clinvar vcf path on the local file system.
-
-    Returns:
-        str: return VCF release date as string, or None if release date not found in header.
-    """
-    with gzip.open(local_vcf_path, 'rt') as f:
-        for line in f:
-            if line.startswith('##fileDate='):
-                return line.split('=')[-1].strip()
-
-            if not line.startswith('#'):
-                return None
-
+def parse_clinvar_release_date(clinvar_url: str) -> str:
+    response = requests.get(clinvar_url, stream=True, timeout=10)
+    for byte_line in gzip.GzipFile(fileobj=response.raw):
+        line = byte_line.decode('ascii').strip()
+        if not line:
+            continue
+        if line.startswith('##fileDate='):
+            return line.split('=')[-1].strip()
+        if not line.startswith('#'):
+            return None
     return None
 
 
diff --git a/v03_pipeline/lib/reference_data/clinvar_test.py b/v03_pipeline/lib/reference_data/clinvar_test.py
index 8e1b509ff..fd8d4e832 100644
--- a/v03_pipeline/lib/reference_data/clinvar_test.py
+++ b/v03_pipeline/lib/reference_data/clinvar_test.py
@@ -1,17 +1,43 @@
+import gzip
 import unittest
 from unittest import mock
 
 import hail as hl
+import responses
 
 from v03_pipeline.lib.reference_data.clinvar import (
     import_submission_table,
     join_to_submission_summary_ht,
+    parse_clinvar_release_date,
     parsed_and_mapped_clnsigconf,
     parsed_clnsig,
 )
 
+CLINVAR_VCF_DATA = b"""
+##fileformat=VCFv4.1
+##fileDate=2024-10-27
+##source=ClinVar
+##reference=GRCh37
+##ID=<Description="ClinVar Variation ID">
+##INFO=<ID=AF_ESP,Number=1,Type=Float,Description="allele frequencies from GO-ESP">
+"""
+
 
 class ClinvarTest(unittest.TestCase):
+    @responses.activate
+    def test_parse_clinvar_release_date(self):
+        clinvar_url = (
+            'https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz'
+        )
+        responses.get(
+            clinvar_url,
+            body=gzip.compress(CLINVAR_VCF_DATA),
+        )
+        self.assertEqual(
+            parse_clinvar_release_date(clinvar_url),
+            '2024-10-27',
+        )
+
     def test_parsed_clnsig(self):
         ht = hl.Table.parallelize(
             [
diff --git a/v03_pipeline/lib/reference_data/compare_globals.py b/v03_pipeline/lib/reference_data/compare_globals.py
index 1feb0ac12..c295b3a35 100644
--- a/v03_pipeline/lib/reference_data/compare_globals.py
+++ b/v03_pipeline/lib/reference_data/compare_globals.py
@@ -4,8 +4,10 @@
 
 from v03_pipeline.lib.logger import get_logger
 from v03_pipeline.lib.model import (
+    DatasetType,
     ReferenceGenome,
 )
+from v03_pipeline.lib.reference_data.clinvar import parse_clinvar_release_date
 from v03_pipeline.lib.reference_data.config import CONFIG
 from v03_pipeline.lib.reference_data.dataset_table_operations import (
     get_all_select_fields,
@@ -16,6 +18,17 @@
 logger = get_logger(__name__)
 
 
+def clinvar_versions_equal(
+    ht: hl.Table,
+    reference_genome: ReferenceGenome,
+    dataset_type: DatasetType,
+) -> bool:
+    dataset = 'clinvar_mito' if dataset_type == DatasetType.MITO else 'clinvar'
+    return hl.eval(ht.globals.versions[dataset]) == parse_clinvar_release_date(
+        CONFIG[dataset][reference_genome.v02_value],
+    )
+
+
 @dataclasses.dataclass
 class Globals:
     paths: dict[str, str]
diff --git a/v03_pipeline/lib/reference_data/gencode/download_utils.py b/v03_pipeline/lib/reference_data/gencode/download_utils.py
deleted file mode 100644
index 420e860b9..000000000
--- a/v03_pipeline/lib/reference_data/gencode/download_utils.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import logging
-import os
-import tempfile
-from contextlib import contextmanager
-
-import hail as hl
-import requests
-from google.cloud import storage
-
-logger = logging.getLogger(__name__)
-
-
-def parse_gs_path_to_bucket(gs_path):
-    bucket_name = gs_path.replace('gs://', '').split('/')[0]
-    file_name = gs_path.split(bucket_name)[-1].lstrip('/')
-
-    storage_client = storage.Client()
-    bucket = storage_client.bucket(bucket_name)
-
-    return bucket, file_name
-
-
-def stream_gs_file(gs_path, raw_download=False):
-    logger.info(f'Stream from GCS: {gs_path}')
-    bucket, file_name = parse_gs_path_to_bucket(gs_path)
-
-    blob = bucket.get_blob(file_name)
-
-    return blob and blob.download_as_string(raw_download=raw_download)
-
-
-@contextmanager
-def file_writer(file_path, get_existing_size=False):
-    bucket = None
-    size = None
-    if is_gs_path(file_path):
-        local_file_path = os.path.join(
-            tempfile.gettempdir(),
-            os.path.basename(file_path),
-        )
-        bucket, file_name = parse_gs_path_to_bucket(file_path)
-        if get_existing_size:
-            blob = bucket.get_blob(file_name)
-            size = blob and blob.size
-    else:
-        local_file_path = file_path
-        if get_existing_size:
-            size = os.path.isfile(local_file_path) and os.path.getsize(local_file_path)
-
-    local_file = open(local_file_path, 'wb')
-
-    yield local_file, size
-
-    local_file.close()
-
-    if bucket:
-        blob = bucket.blob(file_name)
-        blob.upload_from_filename(local_file_path)
-
-
-def is_gs_path(path):
-    return path.startswith('gs://')
-
-
-def path_exists(path):
-    is_gs = is_gs_path(path)
-    return (is_gs and hl.hadoop_exists(path)) or (not is_gs and os.path.exists(path))
-
-
-DEFAULT_TO_DIR = tempfile.gettempdir()
-
-
-def download_file(url, to_dir=None, verbose=True):
-    """Download the given file and returns its local path.
-    Args:
-       url (string): HTTP or FTP url
-       to_dir: optional save to directory
-       verbose: display verbose information
-    Returns:
-       string: local file path
-    """
-    if to_dir is None:
-        to_dir = DEFAULT_TO_DIR
-
-    if not (url and url.startswith(('http://', 'https://'))):
-        msg = f'Invalid url: {url}'
-        raise ValueError(msg)
-    remote_file_size = _get_remote_file_size(url)
-
-    file_path = os.path.join(to_dir, os.path.basename(url))
-    with file_writer(file_path, get_existing_size=True) as fw:
-        f, file_size = fw
-        if file_size and file_size == remote_file_size:
-            logger.info(
-                f'Re-using {file_path} previously downloaded from {url}',
-            )
-            return file_path
-
-        is_gz = url.endswith('.gz')
-        response = requests.get(url, stream=is_gz)
-        input_iter = response if is_gz else response.iter_content()
-        if verbose:
-            logger.info(f'Downloading {url} to {file_path}')
-
-        f.writelines(input_iter)
-        input_iter.close()
-
-    return file_path
-
-
-def _get_remote_file_size(url):
-    return int(requests.head(url).headers.get('Content-Length', '0'))
diff --git a/v03_pipeline/lib/reference_data/gencode/download_utils_test.py b/v03_pipeline/lib/reference_data/gencode/download_utils_test.py
deleted file mode 100644
index d990bf6f6..000000000
--- a/v03_pipeline/lib/reference_data/gencode/download_utils_test.py
+++ /dev/null
@@ -1,130 +0,0 @@
-import unittest
-from unittest import mock
-
-import responses
-
-from v03_pipeline.lib.reference_data.gencode.download_utils import download_file
-
-DEFAULT_TEST_DIR = 'default_test/dir'
-TEST_DIR = 'test/dir'
-GS_TEST_DIR = 'gs://test-bucket/test/dir'
-TEST_TXT_FILE = 'test_file.txt'
-TEST_GZ_FILE = 'test_file.gz'
-TXT_DATA_URL = 'https://mock_url/test_file.txt'
-GZ_DATA_URL = 'https://mock_url/test_file.gz'
-GZ_DATA = b'test data\nanother line\n'
-
-
-class DownloadUtilsTest(unittest.TestCase):
-    @responses.activate
-    @mock.patch(
-        'v03_pipeline.lib.reference_data.gencode.download_utils.DEFAULT_TO_DIR',
-        DEFAULT_TEST_DIR,
-    )
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.download_utils.logger')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.download_utils.os.path.isfile')
-    @mock.patch(
-        'v03_pipeline.lib.reference_data.gencode.download_utils.os.path.getsize',
-    )
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.download_utils.open')
-    @mock.patch(
-        'v03_pipeline.lib.reference_data.gencode.download_utils.tempfile.gettempdir',
-    )
-    @mock.patch(
-        'v03_pipeline.lib.reference_data.gencode.download_utils.parse_gs_path_to_bucket',
-    )
-    def test_download_file(
-        self,
-        mock_get_bucket,
-        mock_gettempdir,
-        mock_open,
-        mock_getsize,
-        mock_isfile,
-        mock_logger,
-    ):
-        responses.add(
-            responses.HEAD,
-            GZ_DATA_URL,
-            headers={'Content-Length': '1024'},
-            status=200,
-            body=b' ' * 1024,
-        )
-        responses.add(responses.GET, GZ_DATA_URL, body=GZ_DATA)
-        responses.add(
-            responses.HEAD,
-            TXT_DATA_URL,
-            headers={'Content-Length': '1024'},
-            status=200,
-            body=b' ' * 1024,
-        )
-        responses.add(responses.GET, TXT_DATA_URL, body='test data\nanother line\n')
-
-        # Test bad url
-        with self.assertRaises(ValueError) as ve:
-            download_file('bad_url')
-        self.assertEqual(str(ve.exception), 'Invalid url: bad_url')
-
-        # Test already downloaded
-        mock_isfile.return_value = True
-        mock_getsize.return_value = 1024
-        result = download_file(GZ_DATA_URL)
-        self.assertEqual(result, 'default_test/dir/test_file.gz')
-        mock_open.assert_called_with('default_test/dir/test_file.gz', 'wb')
-        mock_isfile.assert_called_with('default_test/dir/test_file.gz')
-        mock_getsize.assert_called_with('default_test/dir/test_file.gz')
-        mock_logger.info.assert_called_with(
-            f'Re-using default_test/dir/test_file.gz previously downloaded from {GZ_DATA_URL}',
-        )
-
-        # Test download, .gz file format, verbose
-        mock_isfile.reset_mock()
-        mock_getsize.reset_mock()
-        mock_logger.reset_mock()
-        mock_open.reset_mock()
-        mock_isfile.return_value = False
-        result = download_file(GZ_DATA_URL, TEST_DIR)
-        self.assertEqual(result, 'test/dir/test_file.gz')
-        mock_isfile.assert_called_with('test/dir/test_file.gz')
-        mock_getsize.assert_not_called()
-        mock_open.assert_called_with('test/dir/test_file.gz', 'wb')
-        mock_logger.info.assert_called_with(
-            f'Downloading {GZ_DATA_URL} to test/dir/test_file.gz',
-        )
-
-        # Test download, non-.gz file format, non-verbose
-        mock_isfile.reset_mock()
-        mock_logger.reset_mock()
-        mock_open.reset_mock()
-        mock_isfile.return_value = False
-        result = download_file(TXT_DATA_URL, TEST_DIR, verbose=False)
-        self.assertEqual(result, 'test/dir/test_file.txt')
-        mock_isfile.assert_called_with('test/dir/test_file.txt')
-        mock_getsize.assert_not_called()
-        mock_open.assert_called_with('test/dir/test_file.txt', 'wb')
-        mock_open.return_value.writelines.assert_called_once()
-        mock_logger.info.assert_not_called()
-
-        mock_gettempdir.assert_not_called()
-        mock_get_bucket.assert_not_called()
-
-        # Test using Google Storage
-        mock_isfile.reset_mock()
-        mock_logger.reset_mock()
-        mock_open.reset_mock()
-        mock_gettempdir.return_value = TEST_DIR
-        mock_bucket = mock.MagicMock()
-        mock_get_bucket.return_value = mock_bucket, 'test/dir/test_file.gz'
-        result = download_file(GZ_DATA_URL, GS_TEST_DIR)
-        self.assertEqual(result, 'gs://test-bucket/test/dir/test_file.gz')
-        mock_gettempdir.assert_called_once()
-        mock_isfile.assert_not_called()
-        mock_getsize.assert_not_called()
-        mock_open.assert_called_with('test/dir/test_file.gz', 'wb')
-        mock_logger.info.assert_called_with(
-            f'Downloading {GZ_DATA_URL} to gs://test-bucket/test/dir/test_file.gz',
-        )
-        mock_bucket.get_blob.assert_called_with('test/dir/test_file.gz')
-        mock_bucket.blob.assert_called_with('test/dir/test_file.gz')
-        mock_bucket.blob.return_value.upload_from_filename.assert_called_with(
-            'test/dir/test_file.gz',
-        )
diff --git a/v03_pipeline/lib/reference_data/gencode/mapping_gene_ids.py b/v03_pipeline/lib/reference_data/gencode/mapping_gene_ids.py
index fed40301f..96597f815 100644
--- a/v03_pipeline/lib/reference_data/gencode/mapping_gene_ids.py
+++ b/v03_pipeline/lib/reference_data/gencode/mapping_gene_ids.py
@@ -1,21 +1,8 @@
 import gzip
 import logging
-import os
-import pickle
 
 import requests
 
-from v03_pipeline.lib.reference_data.gencode.download_utils import (
-    download_file,
-    file_writer,
-    is_gs_path,
-    path_exists,
-    stream_gs_file,
-)
-
-GENOME_VERSION_GRCh37 = '37'
-GENOME_VERSION_GRCh38 = '38'
-
 logger = logging.getLogger(__name__)
 
 GENCODE_GTF_URL = 'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_{gencode_release}/gencode.v{gencode_release}.annotation.gtf.gz'
@@ -33,115 +20,43 @@
     'phase',
     'info',
 ]
+EXPECTED_ENSEMBLE_TO_REFSEQ_FIELDS = 3
 
 
-def _get_pickle_file(path):
-    root, ext = os.path.splitext(path)
-    return root + '.pickle'
-
-
-def _load_parsed_data_or_download(gencode_release, download_path):
-    gene_id_mapping = {}
+def load_gencode_gene_symbol_to_gene_id(gencode_release: int) -> dict[str, str]:
     url = GENCODE_GTF_URL.format(gencode_release=gencode_release)
-    gencode_gtf_path = os.path.join(download_path, os.path.basename(url))
-    pickle_file = _get_pickle_file(gencode_gtf_path)
-    if path_exists(pickle_file):
-        logger.info(
-            'Use the existing pickle file {}.\nIf you want to reload the data, please delete it and re-run the data loading.'.format(
-                pickle_file,
-            ),
-        )
-        if is_gs_path(pickle_file):
-            p = pickle.loads(stream_gs_file(pickle_file))
-        else:
-            with open(pickle_file, 'rb') as handle:
-                p = pickle.load(handle)
-        gene_id_mapping.update(p)
-    elif not path_exists(gencode_gtf_path):
-        gencode_gtf_path = download_file(url, to_dir=download_path)
-        logger.info(f'Downloaded to {gencode_gtf_path}')
-    else:
-        logger.info(
-            'Use the existing downloaded file {}.\nIf you want to re-download it, please delete the file and re-run the pipeline.'.format(
-                gencode_gtf_path,
-            ),
-        )
-
-    return gene_id_mapping, gencode_gtf_path
-
-
-def _parse_gtf_data(gencode_gtf_path):
-    gene_id_mapping = {}
-    logger.info(f'Loading {gencode_gtf_path}')
-    is_gs = is_gs_path(gencode_gtf_path)
-    gencode_file = (
-        gzip.decompress(stream_gs_file(gencode_gtf_path, raw_download=True))
-        .decode()
-        .split('\n')
-        if is_gs
-        else gzip.open(gencode_gtf_path, 'rt')
-    )
-    for i, line in enumerate(gencode_file):
-        line = line.rstrip('\r\n')
+    response = requests.get(url, stream=True, timeout=10)
+    gene_symbol_to_gene_id = {}
+    for line in gzip.GzipFile(fileobj=response.raw):
+        line = line.decode('ascii')  # noqa: PLW2901
         if not line or line.startswith('#'):
             continue
-        fields = line.split('\t')
-
+        fields = line.strip().split('\t')
         if len(fields) != len(GENCODE_FILE_HEADER):
+            msg = f'Unexpected number of fields: {fields}'
             raise ValueError(
-                'Unexpected number of fields on line #%s: %s' % (i, fields),
+                msg,
             )
-
-        record = dict(zip(GENCODE_FILE_HEADER, fields))
-
+        record = dict(zip(GENCODE_FILE_HEADER, fields, strict=False))
         if record['feature_type'] != 'gene':
             continue
-
         # parse info field
         info_fields = [x.strip().split() for x in record['info'].split(';') if x != '']
         info_fields = {k: v.strip('"') for k, v in info_fields}
+        gene_symbol_to_gene_id[info_fields['gene_name']] = info_fields['gene_id'].split(
+            '.',
+        )[0]
+    return gene_symbol_to_gene_id
 
-        gene_id_mapping[info_fields['gene_name']] = info_fields['gene_id'].split('.')[0]
-
-    if not is_gs:
-        gencode_file.close()
-
-    pickle_file = _get_pickle_file(gencode_gtf_path)
-    logger.info(f'Saving to pickle {pickle_file}')
-    with file_writer(pickle_file) as fw:
-        f, _ = fw
-        pickle.dump(gene_id_mapping, f, protocol=pickle.HIGHEST_PROTOCOL)
-
-    return gene_id_mapping
-
-
-def load_gencode_gene_symbol_to_gene_id(gencode_release, download_path=''):
-    """Load Gencode to create a gene symbols to gene ids mapping table.
-
-    Args:
-        gencode_release (int): the gencode release to load (eg. 25)
-        download_path (str): The path for downloaded data
-    """
-    gene_id_mapping, gencode_gtf_path = _load_parsed_data_or_download(
-        gencode_release,
-        download_path,
-    )
-
-    if not gene_id_mapping:
-        gene_id_mapping = _parse_gtf_data(gencode_gtf_path)
-
-    logger.info(f'Got {len(gene_id_mapping)} gene id mapping records')
-    return gene_id_mapping
 
 def load_gencode_ensembl_to_refseq_id(gencode_release: int):
     url = GENCODE_ENSEMBL_TO_REFSEQ_URL.format(gencode_release=gencode_release)
-    response = requests.get(url, stream=True)
+    response = requests.get(url, stream=True, timeout=10)
     ensembl_to_refseq_ids = {}
     for line in gzip.GzipFile(fileobj=response.raw):
-        line = line.decode('ascii').strip().split('\t')
-        if len(line) > 3:
-            raise ValueError(
-                'Unexpected number of fields on line in ensemble_to_refseq mapping',
-            )
-        ensembl_to_refseq_ids[line[0].split('.')[0]] = line[1]
+        fields = line.decode('ascii').strip().split('\t')
+        if len(fields) > EXPECTED_ENSEMBLE_TO_REFSEQ_FIELDS:
+            msg = 'Unexpected number of fields on line in ensemble_to_refseq mapping'
+            raise ValueError(msg)
+        ensembl_to_refseq_ids[fields[0].split('.')[0]] = fields[1]
     return ensembl_to_refseq_ids
diff --git a/v03_pipeline/lib/reference_data/gencode/mapping_gene_ids_tests.py b/v03_pipeline/lib/reference_data/gencode/mapping_gene_ids_tests.py
index 585278a7b..58c037048 100644
--- a/v03_pipeline/lib/reference_data/gencode/mapping_gene_ids_tests.py
+++ b/v03_pipeline/lib/reference_data/gencode/mapping_gene_ids_tests.py
@@ -1,162 +1,52 @@
 import gzip
 import unittest
-from unittest import mock
 
 import responses
 
-from v03_pipeline.lib.reference_data.gencode.mapping_gene_ids import load_gencode_ensembl_to_refseq_id, load_gencode_gene_symbol_to_gene_id, GENCODE_ENSEMBL_TO_REFSEQ_URL
+from v03_pipeline.lib.reference_data.gencode.mapping_gene_ids import (
+    GENCODE_ENSEMBL_TO_REFSEQ_URL,
+    GENCODE_GTF_URL,
+    load_gencode_ensembl_to_refseq_id,
+    load_gencode_gene_symbol_to_gene_id,
+)
 
-DOWNLOAD_PATH = 'test/path'
-GS_DOWNLOAD_PATH ='gs://test-bucket/test/path'
-DOWNLOAD_FILE = 'test/path/gencode.v29.annotation.gtf.gz'
-PICKLE_FILE = 'test/path/gencode.v29.annotation.gtf.pickle'
-PICKLE_FILE_HANDLE = 'handle'
 GTF_DATA = [
-    '#description: evidence-based annotation of the human genome, version 31 (Ensembl 97), mapped to GRCh37 with gencode-backmap\n',
-    'chr1	HAVANA	gene	11869	14409	.	+	.	gene_id "ENSG00000223972.5_2"; gene_type "transcribed_unprocessed_pseudogene"; gene_name "DDX11L1"; level 2; hgnc_id "HGNC:37102"; havana_gene "OTTHUMG00000000961.2_2"; remap_status "full_contig"; remap_num_mappings 1; remap_target_status "overlap";\n',
-    'chr1	HAVANA	gene	621059	622053	.	-	.	gene_id "ENSG00000284662.1_2"; gene_type "protein_coding"; gene_name "OR4F16"; level 2; hgnc_id "HGNC:15079"; havana_gene "OTTHUMG00000002581.3_2"; remap_status "full_contig"; remap_num_mappings 1; remap_target_status "overlap";\n',
-    'GL000193.1	HAVANA	gene	77815	78162	.	+	.	gene_id "ENSG00000279783.1_5"; gene_type "processed_pseudogene"; gene_name "AC018692.2"; level 2; havana_gene "OTTHUMG00000189459.1_5"; remap_status "full_contig"; remap_num_mappings 1; remap_target_status "new";\n',
+    '#description: evidence-based annotation of the human genome, version 31 (Ensembl 97), mapped to GRCh37 with gencode-backmap',
+    'chr1	HAVANA	gene	11869	14409	.	+	.	gene_id "ENSG00000223972.5_2"; gene_type "transcribed_unprocessed_pseudogene"; gene_name "DDX11L1"; level 2; hgnc_id "HGNC:37102"; havana_gene "OTTHUMG00000000961.2_2"; remap_status "full_contig"; remap_num_mappings 1; remap_target_status "overlap";',
+    'chr1	HAVANA	gene	621059	622053	.	-	.	gene_id "ENSG00000284662.1_2"; gene_type "protein_coding"; gene_name "OR4F16"; level 2; hgnc_id "HGNC:15079"; havana_gene "OTTHUMG00000002581.3_2"; remap_status "full_contig"; remap_num_mappings 1; remap_target_status "overlap";',
+    'GL000193.1	HAVANA	gene	77815	78162	.	+	.	gene_id "ENSG00000279783.1_5"; gene_type "processed_pseudogene"; gene_name "AC018692.2"; level 2; havana_gene "OTTHUMG00000189459.1_5"; remap_status "full_contig"; remap_num_mappings 1; remap_target_status "new";',
 ]
-GENE_ID_MAPPING = {"DDX11L1": "ENSG00000223972", "OR4F16": "ENSG00000284662", "AC018692.2": "ENSG00000279783"}
+GENE_ID_MAPPING = {
+    'DDX11L1': 'ENSG00000223972',
+    'OR4F16': 'ENSG00000284662',
+    'AC018692.2': 'ENSG00000279783',
+}
 
 
-ENSEMBL_TO_REFSEQ_DATA = b'''ENST00000424215.1\tNR_121638.1
+ENSEMBL_TO_REFSEQ_DATA = b"""ENST00000424215.1\tNR_121638.1
 ENST00000378391.6\tNM_199454.3\tNP_955533.2
 ENST00000270722.10\tNM_022114.4\tNP_071397.3
-ENST00000288774.8\tNM_001374425.1\tNP_001361354.1'''
+ENST00000288774.8\tNM_001374425.1\tNP_001361354.1"""
 
-class LoadGencodeTestCase(unittest.TestCase):
-
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.logger')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.path_exists')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.pickle')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.open')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.gzip.open')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.file_writer')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.download_file')
-    def test_load_gencode_local(self, mock_download_file, mock_file_writer, mock_gopen, mock_open, mock_pickle,
-                                mock_path_exists, mock_logger):
-        # test using saved file
-        mock_path_exists.side_effect = [True]
-        mock_pickle.load.return_value = GENE_ID_MAPPING
-        gene_id_mapping = load_gencode_gene_symbol_to_gene_id(23, download_path=DOWNLOAD_PATH)
-        mock_file_writer.assert_not_called()
-        mock_download_file.assert_not_called()
-        mock_gopen.assert_not_called()
-        mock_open.assert_called_with('test/path/gencode.v23.annotation.gtf.pickle', 'rb')
-        mock_pickle.load.assert_called_with(mock_open.return_value.__enter__.return_value)
-        mock_path_exists.assert_called_with('test/path/gencode.v23.annotation.gtf.pickle')
-        mock_logger.info.assert_has_calls([
-            mock.call('Use the existing pickle file test/path/gencode.v23.annotation.gtf.pickle.\nIf you want to reload the data, please delete it and re-run the data loading.'),
-            mock.call('Got 3 gene id mapping records'),
-        ])
-        self.assertEqual(gene_id_mapping, GENE_ID_MAPPING)
 
-        # test downloading and parsing gtf data
-        mock_path_exists.reset_mock()
-        mock_logger.reset_mock()
-        mock_pickle.reset_mock()
-        mock_open.reset_mock()
-        mock_path_exists.side_effect = [False, False]
-        mock_download_file.return_value = 'test/path/gencode.v24.annotation.gtf.gz'
-        mock_gopen.return_value.__iter__.return_value = GTF_DATA
-        mock_f = mock.MagicMock()
-        mock_file_writer.return_value.__enter__.return_value = mock_f, None
-        gene_id_mapping = load_gencode_gene_symbol_to_gene_id(24, download_path=DOWNLOAD_PATH)
-        self.assertEqual(gene_id_mapping, GENE_ID_MAPPING)
-        mock_path_exists.assert_has_calls([
-            mock.call('test/path/gencode.v24.annotation.gtf.pickle'),
-            mock.call('test/path/gencode.v24.annotation.gtf.gz'),
-        ])
-        mock_download_file.assert_called_with(
-            'http://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_24/gencode.v24.annotation.gtf.gz',
-            to_dir='test/path',
+class LoadGencodeTestCase(unittest.TestCase):
+    @responses.activate
+    def test_load_gencode_gene_symbol_to_gene_id(self):
+        url = GENCODE_GTF_URL.format(gencode_release=12)
+        responses.add(
+            responses.GET,
+            url,
+            body=gzip.compress(('\n'.join(GTF_DATA)).encode()),
+        )
+        mapping = load_gencode_gene_symbol_to_gene_id(12)
+        self.assertDictEqual(
+            mapping,
+            {
+                'AC018692.2': 'ENSG00000279783',
+                'DDX11L1': 'ENSG00000223972',
+                'OR4F16': 'ENSG00000284662',
+            },
         )
-        mock_file_writer.assert_called_with('test/path/gencode.v24.annotation.gtf.pickle')
-        mock_pickle.dump.assert_called_with(GENE_ID_MAPPING, mock_f, protocol=mock.ANY)
-        mock_gopen.assert_called_with('test/path/gencode.v24.annotation.gtf.gz', 'rt')
-        mock_open.assert_not_called()
-        mock_logger.info.assert_has_calls([
-            mock.call('Downloaded to test/path/gencode.v24.annotation.gtf.gz'),
-            mock.call('Loading test/path/gencode.v24.annotation.gtf.gz'),
-            mock.call('Saving to pickle test/path/gencode.v24.annotation.gtf.pickle'),
-            mock.call('Got 3 gene id mapping records')
-        ])
-        mock_pickle.load.assert_not_called()
-
-        # test using downloaded file
-        mock_path_exists.reset_mock()
-        mock_logger.reset_mock()
-        mock_download_file.reset_mock()
-        mock_pickle.reset_mock()
-        mock_path_exists.side_effect = [False, True]
-        mock_gopen.return_value.__iter__.return_value = GTF_DATA
-        gene_id_mapping = load_gencode_gene_symbol_to_gene_id(24, download_path=DOWNLOAD_PATH)
-        self.assertEqual(gene_id_mapping, GENE_ID_MAPPING)
-        mock_path_exists.assert_has_calls([
-            mock.call('test/path/gencode.v24.annotation.gtf.pickle'),
-            mock.call('test/path/gencode.v24.annotation.gtf.gz'),
-        ])
-        mock_gopen.assert_called_with('test/path/gencode.v24.annotation.gtf.gz', 'rt')
-        mock_download_file.assert_not_called()
-        mock_file_writer.assert_called_with('test/path/gencode.v24.annotation.gtf.pickle')
-        mock_pickle.dump.assert_called_with(GENE_ID_MAPPING, mock_f, protocol=mock.ANY)
-        mock_open.assert_not_called()
-        mock_logger.info.assert_has_calls([
-            mock.call('Use the existing downloaded file test/path/gencode.v24.annotation.gtf.gz.\nIf you want to re-download it, please delete the file and re-run the pipeline.'),
-            mock.call('Loading test/path/gencode.v24.annotation.gtf.gz'),
-            mock.call('Saving to pickle test/path/gencode.v24.annotation.gtf.pickle'),
-            mock.call('Got 3 gene id mapping records')
-        ])
-        mock_pickle.load.assert_not_called()
-
-        # bad gtf data test
-        mock_path_exists.side_effect = [False, False]
-        mock_gopen.return_value.__iter__.return_value = ['bad data']
-        with self.assertRaises(ValueError) as ve:
-            _ = load_gencode_gene_symbol_to_gene_id(24, download_path=DOWNLOAD_PATH)
-        self.assertEqual(str(ve.exception), "Unexpected number of fields on line #0: ['bad data']")
-
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.gzip')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.logger')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.path_exists')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.pickle')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.stream_gs_file')
-    @mock.patch('v03_pipeline.lib.reference_data.gencode.mapping_gene_ids.file_writer')
-    def test_load_gencode_using_gs(self, mock_file_writer, mock_stream_gs_file, mock_pickle, mock_path_exists,
-                                   mock_logger, mock_gzip):
-
-        # test using saved file.
-        mock_path_exists.side_effect = [True]
-        mock_pickle.loads.return_value = GENE_ID_MAPPING
-        gene_id_mapping = load_gencode_gene_symbol_to_gene_id(25, download_path=GS_DOWNLOAD_PATH)
-        self.assertEqual(gene_id_mapping, GENE_ID_MAPPING)
-        mock_path_exists.assert_called_with('gs://test-bucket/test/path/gencode.v25.annotation.gtf.pickle')
-        mock_logger.info.assert_has_calls([
-            mock.call('Use the existing pickle file gs://test-bucket/test/path/gencode.v25.annotation.gtf.pickle.\n'
-                      'If you want to reload the data, please delete it and re-run the data loading.'),
-            mock.call('Got 3 gene id mapping records')
-        ])
-        mock_stream_gs_file.assert_called_with('gs://test-bucket/test/path/gencode.v25.annotation.gtf.pickle')
-        mock_pickle.dump.assert_not_called()
-        mock_file_writer.assert_not_called()
-
-        # test using downloaded file.
-        mock_path_exists.side_effect = [False, True]
-        mock_gzip.decompress.return_value = ''.join(GTF_DATA).encode()
-        mock_f = mock.MagicMock()
-        mock_file_writer.return_value.__enter__.return_value = mock_f, None
-        gene_id_mapping = load_gencode_gene_symbol_to_gene_id(25, download_path=GS_DOWNLOAD_PATH)
-        self.assertEqual(gene_id_mapping, GENE_ID_MAPPING)
-        mock_path_exists.assert_has_calls([
-            mock.call('gs://test-bucket/test/path/gencode.v25.annotation.gtf.pickle'),
-            mock.call('gs://test-bucket/test/path/gencode.v25.annotation.gtf.gz'),
-        ])
-        mock_stream_gs_file.assert_called_with('gs://test-bucket/test/path/gencode.v25.annotation.gtf.gz', raw_download=True)
-        mock_gzip.decompress.assert_called_with(mock_stream_gs_file.return_value)
-        mock_file_writer.assert_called_with('gs://test-bucket/test/path/gencode.v25.annotation.gtf.pickle')
-        mock_pickle.dump.assert_called_with(GENE_ID_MAPPING, mock_f, protocol=mock.ANY)
-
 
     @responses.activate
     def test_load_gencode_ensembl_to_refseq_id(self):
@@ -170,6 +60,5 @@ def test_load_gencode_ensembl_to_refseq_id(self):
                 'ENST00000378391': 'NM_199454.3',
                 'ENST00000270722': 'NM_022114.4',
                 'ENST00000288774': 'NM_001374425.1',
-            }
+            },
         )
-
diff --git a/v03_pipeline/lib/tasks/base/base_loading_run_params.py b/v03_pipeline/lib/tasks/base/base_loading_run_params.py
index cde621c4f..7c79b00d6 100644
--- a/v03_pipeline/lib/tasks/base/base_loading_run_params.py
+++ b/v03_pipeline/lib/tasks/base/base_loading_run_params.py
@@ -19,6 +19,9 @@ class BaseLoadingRunParams(luigi.Task):
     run_id = luigi.Parameter()
     sample_type = luigi.EnumParameter(enum=SampleType)
     callset_path = luigi.Parameter()
+    project_guids = luigi.ListParameter(default=[])
+    project_remap_paths = luigi.ListParameter(default=[])
+    project_pedigree_paths = luigi.ListParameter(default=[])
     ignore_missing_samples_when_remapping = luigi.BoolParameter(
         default=False,
         parsing=luigi.BoolParameter.EXPLICIT_PARSING,
diff --git a/v03_pipeline/lib/tasks/base/base_project_info_params.py b/v03_pipeline/lib/tasks/base/base_project_info_params.py
deleted file mode 100644
index 3bb5f5873..000000000
--- a/v03_pipeline/lib/tasks/base/base_project_info_params.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import luigi
-import luigi.util
-
-from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
-
-
-@luigi.util.inherits(BaseLoadingRunParams)
-class BaseLoadingRunWithProjectInfoParams(luigi.Task):
-    project_guids = luigi.ListParameter()
-    project_remap_paths = luigi.ListParameter()
-    project_pedigree_paths = luigi.ListParameter()
diff --git a/v03_pipeline/lib/tasks/base/base_update_project_table.py b/v03_pipeline/lib/tasks/base/base_update_project_table.py
deleted file mode 100644
index 473a31bc2..000000000
--- a/v03_pipeline/lib/tasks/base/base_update_project_table.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import hail as hl
-import luigi
-
-from v03_pipeline.lib.model import SampleType
-from v03_pipeline.lib.paths import project_table_path
-from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
-from v03_pipeline.lib.tasks.files import GCSorLocalTarget
-
-
-class BaseUpdateProjectTableTask(BaseUpdateTask):
-    sample_type = luigi.EnumParameter(enum=SampleType)
-    project_guid = luigi.Parameter()
-
-    def output(self) -> luigi.Target:
-        return GCSorLocalTarget(
-            project_table_path(
-                self.reference_genome,
-                self.dataset_type,
-                self.sample_type,
-                self.project_guid,
-            ),
-        )
-
-    def initialize_table(self) -> hl.Table:
-        key_type = self.dataset_type.table_key_type(self.reference_genome)
-        return hl.Table.parallelize(
-            [],
-            hl.tstruct(
-                **key_type,
-                filters=hl.tset(hl.tstr),
-                # NB: entries is missing here because it is untyped
-                # until we read the type off of the first callset aggregation.
-            ),
-            key=key_type.fields,
-            globals=hl.Struct(
-                family_guids=hl.empty_array(hl.tstr),
-                family_samples=hl.empty_dict(hl.tstr, hl.tarray(hl.tstr)),
-                updates=hl.empty_set(
-                    hl.tstruct(callset=hl.tstr, remap_pedigree_hash=hl.tint32),
-                ),
-            ),
-        )
diff --git a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py
index 21b0253b3..31c718034 100644
--- a/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py
+++ b/v03_pipeline/lib/tasks/base/base_update_variant_annotations_table.py
@@ -36,16 +36,12 @@ def output(self) -> luigi.Target:
 
     def requires(self) -> list[luigi.Task]:
         requirements = [
-            UpdateCachedReferenceDatasetQueries(
-                reference_genome=self.reference_genome,
-                dataset_type=self.dataset_type,
-            ),
+            self.clone(UpdateCachedReferenceDatasetQueries),
         ]
         requirements.extend(
-            UpdatedReferenceDatasetCollectionTask(
-                self.reference_genome,
-                self.dataset_type,
-                rdc,
+            self.clone(
+                UpdatedReferenceDatasetCollectionTask,
+                reference_dataset_collection=rdc,
             )
             for rdc in ReferenceDatasetCollection.for_reference_genome_dataset_type(
                 self.reference_genome,
diff --git a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py
index 58e2cae18..dc9c2a17e 100644
--- a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py
+++ b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries.py
@@ -4,15 +4,15 @@
 from v03_pipeline.lib.model import (
     CachedReferenceDatasetQuery,
 )
-from v03_pipeline.lib.tasks.base.base_loading_pipeline_params import (
-    BaseLoadingPipelineParams,
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
 )
 from v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query import (
     UpdatedCachedReferenceDatasetQuery,
 )
 
 
-@luigi.util.inherits(BaseLoadingPipelineParams)
+@luigi.util.inherits(BaseLoadingRunParams)
 class UpdateCachedReferenceDatasetQueries(luigi.Task):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
diff --git a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py
index 794a77897..d6bf33d36 100644
--- a/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py
+++ b/v03_pipeline/lib/tasks/reference_data/update_cached_reference_dataset_queries_test.py
@@ -7,6 +7,7 @@
     CachedReferenceDatasetQuery,
     DatasetType,
     ReferenceGenome,
+    SampleType,
 )
 from v03_pipeline.lib.tasks.reference_data.update_cached_reference_dataset_queries import (
     UpdateCachedReferenceDatasetQueries,
@@ -21,99 +22,100 @@ class UpdateCachedReferenceDatasetQueriesTest(unittest.TestCase):
     def test_37_snv_indel(self, mock_crdq_task):
         mock_crdq_task.return_value = MockCompleteTask()
         worker = luigi.worker.Worker()
+        kwargs = {
+            'sample_type': SampleType.WGS,
+            'callset_path': '',
+            'project_guids': [],
+            'project_remap_paths': [],
+            'project_pedigree_paths': [],
+            'skip_validation': True,
+            'run_id': '1',
+        }
         task = UpdateCachedReferenceDatasetQueries(
             reference_genome=ReferenceGenome.GRCh37,
             dataset_type=DatasetType.SNV_INDEL,
+            **kwargs,
         )
         worker.add(task)
         worker.run()
         self.assertTrue(task.complete())
-        mock_crdq_task.assert_has_calls(
-            [
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh37,
-                    dataset_type=DatasetType.SNV_INDEL,
-                    crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
-                ),
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh37,
-                    dataset_type=DatasetType.SNV_INDEL,
-                    crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
-                ),
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh37,
-                    dataset_type=DatasetType.SNV_INDEL,
-                    crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
-                ),
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh37,
-                    dataset_type=DatasetType.SNV_INDEL,
-                    crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
-                ),
-            ],
+        call_args_list = mock_crdq_task.call_args_list
+        self.assertEqual(len(call_args_list), 4)
+        self.assertEqual(
+            [x.kwargs['crdq'] for x in call_args_list],
+            list(CachedReferenceDatasetQuery),
         )
 
     def test_38_snv_indel(self, mock_crdq_task):
         mock_crdq_task.return_value = MockCompleteTask()
         worker = luigi.worker.Worker()
+        kwargs = {
+            'sample_type': SampleType.WGS,
+            'callset_path': '',
+            'project_guids': [],
+            'project_remap_paths': [],
+            'project_pedigree_paths': [],
+            'skip_validation': True,
+            'run_id': '2',
+        }
         task = UpdateCachedReferenceDatasetQueries(
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.SNV_INDEL,
+            **kwargs,
         )
         worker.add(task)
         worker.run()
         self.assertTrue(task.complete())
-        mock_crdq_task.assert_has_calls(
-            [
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh38,
-                    dataset_type=DatasetType.SNV_INDEL,
-                    crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
-                ),
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh38,
-                    dataset_type=DatasetType.SNV_INDEL,
-                    crdq=CachedReferenceDatasetQuery.GNOMAD_CODING_AND_NONCODING_VARIANTS,
-                ),
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh38,
-                    dataset_type=DatasetType.SNV_INDEL,
-                    crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
-                ),
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh38,
-                    dataset_type=DatasetType.SNV_INDEL,
-                    crdq=CachedReferenceDatasetQuery.HIGH_AF_VARIANTS,
-                ),
-            ],
+        call_args_list = mock_crdq_task.call_args_list
+        self.assertEqual(len(call_args_list), 4)
+        self.assertEqual(
+            [x.kwargs['crdq'] for x in call_args_list],
+            list(CachedReferenceDatasetQuery),
         )
 
     def test_38_mito(self, mock_crdq_task):
         mock_crdq_task.return_value = MockCompleteTask()
         worker = luigi.worker.Worker()
+        kwargs = {
+            'sample_type': SampleType.WGS,
+            'callset_path': '',
+            'project_guids': [],
+            'project_remap_paths': [],
+            'project_pedigree_paths': [],
+            'skip_validation': True,
+            'run_id': '3',
+        }
         task = UpdateCachedReferenceDatasetQueries(
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.MITO,
+            **kwargs,
         )
         worker.add(task)
         worker.run()
         self.assertTrue(task.complete())
-        mock_crdq_task.assert_has_calls(
-            [
-                mock.call(
-                    reference_genome=ReferenceGenome.GRCh38,
-                    dataset_type=DatasetType.MITO,
-                    crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
-                ),
-            ],
+        call_args_list = mock_crdq_task.call_args_list
+        self.assertEqual(len(call_args_list), 1)
+        self.assertEqual(
+            next(x.kwargs['crdq'] for x in call_args_list),
+            CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
         )
 
     def test_38_sv(self, mock_crdq_task):
         mock_crdq_task.return_value = MockCompleteTask()
         worker = luigi.worker.Worker()
+        kwargs = {
+            'sample_type': SampleType.WGS,
+            'callset_path': '',
+            'project_guids': [],
+            'project_remap_paths': [],
+            'project_pedigree_paths': [],
+            'skip_validation': True,
+            'run_id': '4',
+        }
         task = UpdateCachedReferenceDatasetQueries(
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.SV,
+            **kwargs,
         )
         worker.add(task)
         worker.run()
diff --git a/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset.py b/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset.py
index f03526c50..9a0aeca2d 100644
--- a/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset.py
+++ b/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset.py
@@ -1,13 +1,18 @@
 import hail as hl
+import luigi
 
 from v03_pipeline.lib.annotations.fields import get_fields
 from v03_pipeline.lib.logger import get_logger
 from v03_pipeline.lib.model import ReferenceDatasetCollection
 from v03_pipeline.lib.reference_data.compare_globals import (
     Globals,
+    clinvar_versions_equal,
     get_datasets_to_update,
 )
 from v03_pipeline.lib.reference_data.config import CONFIG
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
+)
 from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import (
     BaseUpdateVariantAnnotationsTableTask,
 )
@@ -15,6 +20,7 @@
 logger = get_logger(__name__)
 
 
+@luigi.util.inherits(BaseLoadingRunParams)
 class UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
     BaseUpdateVariantAnnotationsTableTask,
 ):
@@ -49,6 +55,17 @@ def complete(self) -> bool:
             for rdc in self.reference_dataset_collections
             for dataset in rdc.datasets(self.dataset_type)
         ]
+
+        if any(
+            'clinvar' in d for d in datasets_to_check
+        ) and not clinvar_versions_equal(
+            hl.read_table(self.output().path),
+            self.reference_genome,
+            self.dataset_type,
+        ):
+            datasets_to_check.remove('clinvar')
+            self._datasets_to_update.add('clinvar')
+
         annotations_ht_globals = Globals.from_ht(
             hl.read_table(self.output().path),
             datasets_to_check,
diff --git a/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py b/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py
index b5a5ced2f..5c30630e9 100644
--- a/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py
+++ b/v03_pipeline/lib/tasks/reference_data/update_variant_annotations_table_with_updated_reference_dataset_test.py
@@ -19,6 +19,7 @@
     DatasetType,
     ReferenceDatasetCollection,
     ReferenceGenome,
+    SampleType,
 )
 from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
 from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS
@@ -37,6 +38,8 @@
 TEST_INTERVAL_MITO_1 = 'v03_pipeline/var/test/reference_data/test_interval_mito_1.ht'
 TEST_COMBINED_37 = 'v03_pipeline/var/test/reference_data/test_combined_37.ht'
 TEST_HGMD_37 = 'v03_pipeline/var/test/reference_data/test_hgmd_37.ht'
+TEST_SNV_INDEL_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf'
+TEST_MITO_MT = 'v03_pipeline/var/test/callsets/mito_1.mt'
 
 
 MOCK_CADD_CONFIG = {
@@ -58,7 +61,7 @@
 }
 MOCK_CLINVAR_CONFIG = {
     **CONFIG['clinvar']['38'],
-    'source_path': 'ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
+    'source_path': 'https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
     'custom_import': lambda *_: hl.Table.parallelize(
         [],
         hl.tstruct(
@@ -483,7 +486,7 @@
     'clinvar_mito': {
         '38': {
             **CONFIG['clinvar_mito']['38'],
-            'source_path': 'ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
+            'source_path': 'https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
             'custom_import': lambda *_: hl.Table.parallelize(
                 [],
                 hl.tstruct(
@@ -719,12 +722,17 @@ def setUp(self) -> None:
         'v03_pipeline.lib.reference_data.compare_globals.CONFIG',
         MOCK_CONFIG,
     )
+    @mock.patch(
+        'v03_pipeline.lib.tasks.reference_data.update_variant_annotations_table_with_updated_reference_dataset.clinvar_versions_equal',
+    )
     def test_update_vat_with_updated_rdc_snv_indel_38(
         self,
+        mock_clinvar_versions_equal,
         mock_initialize_table,
         mock_update_crdqs_task,
         mock_update_rdc_task,
     ):
+        mock_clinvar_versions_equal.return_value = True
         mock_update_rdc_task.return_value = MockCompleteTask()
         mock_update_crdqs_task.return_value = MockCompleteTask()
         mock_initialize_table.return_value = hl.Table.parallelize(
@@ -754,6 +762,13 @@ def test_update_vat_with_updated_rdc_snv_indel_38(
         task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.SNV_INDEL,
+            sample_type=SampleType.WGS,
+            callset_path=TEST_SNV_INDEL_VCF,
+            project_guids=[],
+            project_remap_paths=[],
+            project_pedigree_paths=[],
+            skip_validation=True,
+            run_id='3',
         )
         worker = luigi.worker.Worker()
         worker.add(task)
@@ -830,7 +845,7 @@ def test_update_vat_with_updated_rdc_snv_indel_38(
                 hl.Struct(
                     paths=hl.Struct(
                         cadd='gs://seqr-reference-data/GRCh37/CADD/CADD_snvs_and_indels.v1.6.ht',
-                        clinvar='ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
+                        clinvar='https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
                         dbnsfp='gs://seqr-reference-data/GRCh37/dbNSFP/v2.9.3/dbNSFP2.9.3_variant.ht',
                         eigen='gs://seqr-reference-data/GRCh37/eigen/EIGEN_coding_noncoding.grch37.ht',
                         exac='gs://seqr-reference-data/GRCh37/gnomad/ExAC.r1.sites.vep.ht',
@@ -929,12 +944,17 @@ def test_update_vat_with_updated_rdc_snv_indel_38(
         'v03_pipeline.lib.reference_data.compare_globals.CONFIG',
         MOCK_CONFIG_MITO,
     )
+    @mock.patch(
+        'v03_pipeline.lib.tasks.reference_data.update_variant_annotations_table_with_updated_reference_dataset.clinvar_versions_equal',
+    )
     def test_update_vat_with_updated_rdc_mito_38(
         self,
+        mock_clinvar_versions_equal,
         mock_initialize_table,
         mock_update_crdqs_task,
         mock_update_rdc_task,
     ):
+        mock_clinvar_versions_equal.return_value = (True,)
         mock_update_rdc_task.return_value = MockCompleteTask()
         mock_update_crdqs_task.return_value = MockCompleteTask()
         mock_initialize_table.return_value = hl.Table.parallelize(
@@ -964,6 +984,13 @@ def test_update_vat_with_updated_rdc_mito_38(
         task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.MITO,
+            sample_type=SampleType.WGS,
+            callset_path=TEST_MITO_MT,
+            project_guids=[],
+            project_remap_paths=[],
+            project_pedigree_paths=[],
+            skip_validation=True,
+            run_id='1',
         )
         worker = luigi.worker.Worker()
         worker.add(task)
@@ -982,7 +1009,7 @@ def test_update_vat_with_updated_rdc_mito_38(
                         hmtvar='gs://seqr-reference-data/GRCh38/mitochondrial/HmtVar/HmtVar%20Jan.%2010%202022.ht',
                         mitomap='gs://seqr-reference-data/GRCh38/mitochondrial/MITOMAP/mitomap-confirmed-mutations-2022-02-04.ht',
                         mitimpact='gs://seqr-reference-data/GRCh38/mitochondrial/MitImpact/MitImpact_db_3.0.7.ht',
-                        clinvar_mito='ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
+                        clinvar_mito='https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
                         dbnsfp_mito='gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.with_new_scores.ht',
                         high_constraint_region_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix high constraint intervals Feb-15-2022.tsv',
                         local_constraint_mito='gs://seqr-reference-data/GRCh38/mitochondrial/local_constraint.tsv',
@@ -1079,12 +1106,17 @@ def test_update_vat_with_updated_rdc_mito_38(
         'v03_pipeline.lib.reference_data.compare_globals.CONFIG',
         MOCK_CONFIG,
     )
+    @mock.patch(
+        'v03_pipeline.lib.tasks.reference_data.update_variant_annotations_table_with_updated_reference_dataset.clinvar_versions_equal',
+    )
     def test_update_vat_with_updated_rdc_snv_indel_37(
         self,
+        mock_clinvar_versions_equal,
         mock_initialize_table,
         mock_update_crdqs_task,
         mock_update_rdc_task,
     ):
+        mock_clinvar_versions_equal.return_value = True
         mock_update_rdc_task.return_value = MockCompleteTask()
         mock_update_crdqs_task.return_value = MockCompleteTask()
         mock_initialize_table.return_value = hl.Table.parallelize(
@@ -1114,6 +1146,13 @@ def test_update_vat_with_updated_rdc_snv_indel_37(
         task = UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
             reference_genome=ReferenceGenome.GRCh37,
             dataset_type=DatasetType.SNV_INDEL,
+            sample_type=SampleType.WGS,
+            callset_path=TEST_SNV_INDEL_VCF,
+            project_guids=[],
+            project_remap_paths=[],
+            project_pedigree_paths=[],
+            skip_validation=True,
+            run_id='2',
         )
         worker = luigi.worker.Worker()
         worker.add(task)
@@ -1128,7 +1167,7 @@ def test_update_vat_with_updated_rdc_snv_indel_37(
                 hl.Struct(
                     paths=hl.Struct(
                         cadd='gs://seqr-reference-data/GRCh37/CADD/CADD_snvs_and_indels.v1.6.ht',
-                        clinvar='ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
+                        clinvar='https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
                         dbnsfp='gs://seqr-reference-data/GRCh37/dbNSFP/v2.9.3/dbNSFP2.9.3_variant.ht',
                         eigen='gs://seqr-reference-data/GRCh37/eigen/EIGEN_coding_noncoding.grch37.ht',
                         exac='gs://seqr-reference-data/GRCh37/gnomad/ExAC.r1.sites.vep.ht',
diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py
index 9aa4a3a74..57d163146 100644
--- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py
+++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query.py
@@ -12,6 +12,7 @@
 )
 from v03_pipeline.lib.reference_data.compare_globals import (
     Globals,
+    clinvar_versions_equal,
     get_datasets_to_update,
 )
 from v03_pipeline.lib.reference_data.config import CONFIG
@@ -19,15 +20,16 @@
     get_ht_path,
     import_ht_from_config_path,
 )
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
+)
 from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
 from v03_pipeline.lib.tasks.files import GCSorLocalTarget, HailTableTask
-from v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection import (
-    UpdatedReferenceDatasetCollectionTask,
-)
 
 logger = get_logger(__name__)
 
 
+@luigi.util.inherits(BaseLoadingRunParams)
 class UpdatedCachedReferenceDatasetQuery(BaseWriteTask):
     crdq = luigi.EnumParameter(enum=CachedReferenceDatasetQuery)
 
@@ -38,14 +40,21 @@ def complete(self) -> bool:
             )
             return False
 
-        datasets_to_check = [self.crdq.dataset(self.dataset_type)]
+        dataset = self.crdq.dataset(self.dataset_type)
+        if 'clinvar' in dataset and not clinvar_versions_equal(
+            hl.read_table(self.output().path),
+            self.reference_genome,
+            self.dataset_type,
+        ):
+            return False
+
         crdq_globals = Globals.from_ht(
             hl.read_table(self.output().path),
-            datasets_to_check,
+            [dataset],
         )
         dataset_config_globals = Globals.from_dataset_configs(
             self.reference_genome,
-            datasets_to_check,
+            [dataset],
         )
         return not get_datasets_to_update(
             crdq_globals,
@@ -71,6 +80,16 @@ def requires(self) -> luigi.Task:
                     ],
                 ),
             )
+        # Special nested import to avoid a circular dependency issue
+        # (ValidateCallset -> this file -> UpdatedReferenceDatasetCollection -> ValidateCallset)
+        # The specific CRDQ referenced in ValidateCallset will never reach
+        # this line due to it being a "query_raw_dataset".  In theory this
+        # would be fixed by splitting the CRDQ into raw_dataset and non-raw_dataset
+        # queries.
+        from v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection import (
+            UpdatedReferenceDatasetCollectionTask,
+        )
+
         return UpdatedReferenceDatasetCollectionTask(
             self.reference_genome,
             self.dataset_type,
diff --git a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py
index 210a8cc8a..566337f2e 100644
--- a/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py
+++ b/v03_pipeline/lib/tasks/reference_data/updated_cached_reference_dataset_query_test.py
@@ -5,12 +5,14 @@
 import hail as hl
 import luigi
 
+import v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection
 from v03_pipeline.lib.annotations.enums import CLINVAR_PATHOGENICITIES
 from v03_pipeline.lib.model import (
     CachedReferenceDatasetQuery,
     DatasetType,
     ReferenceDatasetCollection,
     ReferenceGenome,
+    SampleType,
 )
 from v03_pipeline.lib.paths import (
     cached_reference_dataset_query_path,
@@ -28,6 +30,7 @@
 CLINVAR_CRDQ_PATH = (
     'v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht'
 )
+TEST_SNV_INDEL_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf'
 
 MOCK_CONFIG = {
     'gnomad_qc': {
@@ -57,7 +60,7 @@
     'clinvar': {
         '38': {
             **CONFIG['clinvar']['38'],
-            'source_path': 'ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
+            'source_path': 'https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
             'custom_import': lambda *_: hl.Table.parallelize(
                 [],
                 hl.tstruct(
@@ -109,6 +112,13 @@ def test_gnomad_qc(
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.SNV_INDEL,
             crdq=CachedReferenceDatasetQuery.GNOMAD_QC,
+            sample_type=SampleType.WGS,
+            callset_path=TEST_SNV_INDEL_VCF,
+            project_guids=[],
+            project_remap_paths=[],
+            project_pedigree_paths=[],
+            skip_validation=True,
+            run_id='1',
         )
         worker.add(task)
         worker.run()
@@ -143,14 +153,19 @@ def test_gnomad_qc(
         'v03_pipeline.lib.reference_data.compare_globals.CONFIG',
         MOCK_CONFIG,
     )
-    @mock.patch(
-        'v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query.UpdatedReferenceDatasetCollectionTask',
+    @mock.patch.object(
+        v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection,
+        'UpdatedReferenceDatasetCollectionTask',
     )
     @mock.patch(
         'v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query.CachedReferenceDatasetQuery.query',
     )
+    @mock.patch(
+        'v03_pipeline.lib.tasks.reference_data.updated_cached_reference_dataset_query.clinvar_versions_equal',
+    )
     def test_clinvar(
         self,
+        mock_clinvar_versions_equal,
         mock_crdq_query,
         mock_updated_rdc_task,
     ) -> None:
@@ -158,6 +173,8 @@ def test_clinvar(
         Given a crdq task where there exists a clinvar crdq table and a clinvar rdc table,
         expect task to replace the clinvar crdq table with new version.
         """
+        mock_clinvar_versions_equal.return_value = True
+
         # rdc dependency exists
         mock_updated_rdc_task.return_value = MockCompleteTask()
 
@@ -198,6 +215,13 @@ def _clinvar_path_variants(table, **_: Any):
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.SNV_INDEL,
             crdq=CachedReferenceDatasetQuery.CLINVAR_PATH_VARIANTS,
+            sample_type=SampleType.WGS,
+            callset_path=TEST_SNV_INDEL_VCF,
+            project_guids=[],
+            project_remap_paths=[],
+            project_pedigree_paths=[],
+            skip_validation=True,
+            run_id='2',
         )
         worker.add(task)
         worker.run()
diff --git a/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection.py b/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection.py
index 253e2f526..af2144839 100644
--- a/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection.py
+++ b/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection.py
@@ -6,17 +6,23 @@
 from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
 from v03_pipeline.lib.reference_data.compare_globals import (
     Globals,
+    clinvar_versions_equal,
     get_datasets_to_update,
 )
 from v03_pipeline.lib.reference_data.dataset_table_operations import (
     update_or_create_joined_ht,
 )
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
+)
 from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
 from v03_pipeline.lib.tasks.files import GCSorLocalTarget
+from v03_pipeline.lib.tasks.validate_callset import ValidateCallsetTask
 
 logger = get_logger(__name__)
 
 
+@luigi.util.inherits(BaseLoadingRunParams)
 class UpdatedReferenceDatasetCollectionTask(BaseUpdateTask):
     reference_dataset_collection = luigi.EnumParameter(enum=ReferenceDatasetCollection)
 
@@ -24,6 +30,17 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self._datasets_to_update = []
 
+    def requires(self) -> luigi.Task:
+        # Though there is no explicit functional dependency between
+        # validing the callset and updating the reference data, it's
+        # a more user-friendly experience for the callset validation
+        # to fail/succeed prior to attempting any
+        # compute intensive work.
+        #
+        # Note that, if validation is disabled or skipped the task
+        # still runs but is a no-op.
+        return self.clone(ValidateCallsetTask)
+
     def complete(self) -> bool:
         self._datasets_to_update = []
         datasets = self.reference_dataset_collection.datasets(self.dataset_type)
@@ -37,6 +54,14 @@ def complete(self) -> bool:
             )
             return False
 
+        if any('clinvar' in d for d in datasets) and not clinvar_versions_equal(
+            hl.read_table(self.output().path),
+            self.reference_genome,
+            self.dataset_type,
+        ):
+            datasets.remove('clinvar')
+            self._datasets_to_update.add('clinvar')
+
         joined_ht_globals = Globals.from_ht(
             hl.read_table(self.output().path),
             datasets,
diff --git a/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py b/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py
index b3fdde4bb..bc19d39d5 100644
--- a/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py
+++ b/v03_pipeline/lib/tasks/reference_data/updated_reference_dataset_collection_test.py
@@ -10,6 +10,7 @@
     DatasetType,
     ReferenceDatasetCollection,
     ReferenceGenome,
+    SampleType,
 )
 from v03_pipeline.lib.paths import valid_reference_dataset_collection_path
 from v03_pipeline.lib.reference_data.clinvar import CLINVAR_ASSERTIONS
@@ -20,6 +21,7 @@
 from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
 
 COMBINED_2_PATH = 'v03_pipeline/var/test/reference_data/test_combined_2.ht'
+TEST_SNV_INDEL_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf'
 
 MOCK_PRIMATE_AI_DATASET_HT = hl.Table.parallelize(
     [
@@ -156,20 +158,32 @@ class UpdatedReferenceDatasetCollectionTaskTest(MockedDatarootTestCase):
         MOCK_CONFIG,
     )
     @mock.patch.object(ReferenceDatasetCollection, 'datasets')
+    @mock.patch(
+        'v03_pipeline.lib.tasks.reference_data.updated_reference_dataset_collection.clinvar_versions_equal',
+    )
     def test_update_task_with_empty_reference_data_table(
         self,
+        mock_clinvar_versions_equal,
         mock_rdc_datasets,
     ) -> None:
         """
         Given a new task with no existing reference dataset collection table,
         expect the task to create a new reference dataset collection table for all datasets in the collection.
         """
+        mock_clinvar_versions_equal.return_value = True
         mock_rdc_datasets.return_value = ['cadd', 'primate_ai', 'clinvar']
         worker = luigi.worker.Worker()
         task = UpdatedReferenceDatasetCollectionTask(
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.SNV_INDEL,
             reference_dataset_collection=ReferenceDatasetCollection.COMBINED,
+            sample_type=SampleType.WGS,
+            callset_path=TEST_SNV_INDEL_VCF,
+            project_guids=[],
+            project_remap_paths=[],
+            project_pedigree_paths=[],
+            skip_validation=True,
+            run_id='2',
         )
         worker.add(task)
         worker.run()
@@ -279,6 +293,13 @@ def test_update_task_with_existing_reference_dataset_collection_table(
             reference_genome=ReferenceGenome.GRCh38,
             dataset_type=DatasetType.SNV_INDEL,
             reference_dataset_collection=ReferenceDatasetCollection.COMBINED,
+            sample_type=SampleType.WGS,
+            callset_path=TEST_SNV_INDEL_VCF,
+            project_guids=[],
+            project_remap_paths=[],
+            project_pedigree_paths=[],
+            skip_validation=True,
+            run_id='2',
         )
         worker.add(task)
         worker.run()
diff --git a/v03_pipeline/lib/tasks/trigger_hail_backend_reload.py b/v03_pipeline/lib/tasks/trigger_hail_backend_reload.py
index 427ba23fd..f4e8d36fb 100644
--- a/v03_pipeline/lib/tasks/trigger_hail_backend_reload.py
+++ b/v03_pipeline/lib/tasks/trigger_hail_backend_reload.py
@@ -4,15 +4,15 @@
 
 from v03_pipeline.lib.logger import get_logger
 from v03_pipeline.lib.model import Env
-from v03_pipeline.lib.tasks.base.base_project_info_params import (
-    BaseLoadingRunWithProjectInfoParams,
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
 )
 from v03_pipeline.lib.tasks.write_success_file import WriteSuccessFileTask
 
 logger = get_logger(__name__)
 
 
-@luigi.util.inherits(BaseLoadingRunWithProjectInfoParams)
+@luigi.util.inherits(BaseLoadingRunParams)
 class TriggerHailBackendReload(luigi.Task):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
diff --git a/v03_pipeline/lib/tasks/update_lookup_table.py b/v03_pipeline/lib/tasks/update_lookup_table.py
index 1dd0b746a..fad438f57 100644
--- a/v03_pipeline/lib/tasks/update_lookup_table.py
+++ b/v03_pipeline/lib/tasks/update_lookup_table.py
@@ -10,8 +10,8 @@
 )
 from v03_pipeline.lib.model.constants import PROJECTS_EXCLUDED_FROM_LOOKUP
 from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path
-from v03_pipeline.lib.tasks.base.base_project_info_params import (
-    BaseLoadingRunWithProjectInfoParams,
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
 )
 from v03_pipeline.lib.tasks.base.base_update_lookup_table import (
     BaseUpdateLookupTableTask,
@@ -21,7 +21,7 @@
 )
 
 
-@luigi.util.inherits(BaseLoadingRunWithProjectInfoParams)
+@luigi.util.inherits(BaseLoadingRunParams)
 class UpdateLookupTableTask(BaseUpdateLookupTableTask):
     def complete(self) -> bool:
         return super().complete() and hl.eval(
diff --git a/v03_pipeline/lib/tasks/update_project_table.py b/v03_pipeline/lib/tasks/update_project_table.py
index cd582009f..6c723ffde 100644
--- a/v03_pipeline/lib/tasks/update_project_table.py
+++ b/v03_pipeline/lib/tasks/update_project_table.py
@@ -9,19 +9,30 @@
     remove_family_guids,
 )
 from v03_pipeline.lib.misc.io import remap_pedigree_hash
+from v03_pipeline.lib.paths import project_table_path
 from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
-from v03_pipeline.lib.tasks.base.base_update_project_table import (
-    BaseUpdateProjectTableTask,
+from v03_pipeline.lib.tasks.base.base_update import (
+    BaseUpdateTask,
 )
+from v03_pipeline.lib.tasks.files import GCSorLocalTarget
 from v03_pipeline.lib.tasks.write_remapped_and_subsetted_callset import (
     WriteRemappedAndSubsettedCallsetTask,
 )
 
 
 @luigi.util.inherits(BaseLoadingRunParams)
-class UpdateProjectTableTask(BaseUpdateProjectTableTask):
-    project_remap_path = luigi.Parameter()
-    project_pedigree_path = luigi.Parameter()
+class UpdateProjectTableTask(BaseUpdateTask):
+    project_i = luigi.IntParameter()
+
+    def output(self) -> luigi.Target:
+        return GCSorLocalTarget(
+            project_table_path(
+                self.reference_genome,
+                self.dataset_type,
+                self.sample_type,
+                self.project_guids[self.project_i],
+            ),
+        )
 
     def complete(self) -> bool:
         return super().complete() and hl.eval(
@@ -29,8 +40,8 @@ def complete(self) -> bool:
                 hl.Struct(
                     callset=self.callset_path,
                     remap_pedigree_hash=remap_pedigree_hash(
-                        self.project_remap_path,
-                        self.project_pedigree_path,
+                        self.project_remap_paths[self.project_i],
+                        self.project_pedigree_paths[self.project_i],
                     ),
                 ),
             ),
@@ -39,6 +50,26 @@ def complete(self) -> bool:
     def requires(self) -> luigi.Task:
         return self.clone(WriteRemappedAndSubsettedCallsetTask)
 
+    def initialize_table(self) -> hl.Table:
+        key_type = self.dataset_type.table_key_type(self.reference_genome)
+        return hl.Table.parallelize(
+            [],
+            hl.tstruct(
+                **key_type,
+                filters=hl.tset(hl.tstr),
+                # NB: entries is missing here because it is untyped
+                # until we read the type off of the first callset aggregation.
+            ),
+            key=key_type.fields,
+            globals=hl.Struct(
+                family_guids=hl.empty_array(hl.tstr),
+                family_samples=hl.empty_dict(hl.tstr, hl.tarray(hl.tstr)),
+                updates=hl.empty_set(
+                    hl.tstruct(callset=hl.tstr, remap_pedigree_hash=hl.tint32),
+                ),
+            ),
+        )
+
     def update_table(self, ht: hl.Table) -> hl.Table:
         callset_mt = hl.read_matrix_table(self.input().path)
         callset_ht = compute_callset_family_entries_ht(
@@ -69,8 +100,8 @@ def update_table(self, ht: hl.Table) -> hl.Table:
                 hl.Struct(
                     callset=self.callset_path,
                     remap_pedigree_hash=remap_pedigree_hash(
-                        self.project_remap_path,
-                        self.project_pedigree_path,
+                        self.project_remap_paths[self.project_i],
+                        self.project_pedigree_paths[self.project_i],
                     ),
                 ),
             ),
diff --git a/v03_pipeline/lib/tasks/update_project_table_test.py b/v03_pipeline/lib/tasks/update_project_table_test.py
index 7e6ab67f9..0daad72e0 100644
--- a/v03_pipeline/lib/tasks/update_project_table_test.py
+++ b/v03_pipeline/lib/tasks/update_project_table_test.py
@@ -25,9 +25,10 @@ def test_update_project_table_task(self) -> None:
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_VCF,
-            project_guid='R0113_test_project',
-            project_remap_path=TEST_REMAP,
-            project_pedigree_path=TEST_PEDIGREE_3,
+            project_guids=['R0113_test_project'],
+            project_remap_paths=[TEST_REMAP],
+            project_pedigree_paths=[TEST_PEDIGREE_3],
+            project_i=0,
             skip_validation=True,
         )
         worker.add(upt_task)
@@ -134,9 +135,10 @@ def test_update_project_table_task_different_pedigree(self) -> None:
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_VCF,
-            project_guid='R0113_test_project',
-            project_remap_path=TEST_REMAP,
-            project_pedigree_path=TEST_PEDIGREE_3,
+            project_guids=['R0113_test_project'],
+            project_remap_paths=[TEST_REMAP],
+            project_pedigree_paths=[TEST_PEDIGREE_3],
+            project_i=0,
             skip_validation=True,
         )
         worker.add(upt_task)
@@ -147,9 +149,10 @@ def test_update_project_table_task_different_pedigree(self) -> None:
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_VCF,
-            project_guid='R0113_test_project',
-            project_remap_path=TEST_REMAP,
-            project_pedigree_path=TEST_PEDIGREE_3_DIFFERENT_FAMILIES,
+            project_guids=['R0113_test_project'],
+            project_remap_paths=[TEST_REMAP],
+            project_pedigree_paths=[TEST_PEDIGREE_3_DIFFERENT_FAMILIES],
+            project_i=0,
             skip_validation=True,
         )
         worker.add(upt_task)
diff --git a/v03_pipeline/lib/tasks/update_project_table_with_deleted_families.py b/v03_pipeline/lib/tasks/update_project_table_with_deleted_families.py
index 90f1937dc..56277f34b 100644
--- a/v03_pipeline/lib/tasks/update_project_table_with_deleted_families.py
+++ b/v03_pipeline/lib/tasks/update_project_table_with_deleted_families.py
@@ -2,14 +2,27 @@
 import luigi
 
 from v03_pipeline.lib.misc.family_entries import remove_family_guids
-from v03_pipeline.lib.tasks.base.base_update_project_table import (
-    BaseUpdateProjectTableTask,
-)
+from v03_pipeline.lib.model import SampleType
+from v03_pipeline.lib.paths import project_table_path
+from v03_pipeline.lib.tasks.base.base_update import BaseUpdateTask
+from v03_pipeline.lib.tasks.files import GCSorLocalTarget
 
 
-class UpdateProjectTableWithDeletedFamiliesTask(BaseUpdateProjectTableTask):
+class UpdateProjectTableWithDeletedFamiliesTask(BaseUpdateTask):
+    sample_type = luigi.EnumParameter(enum=SampleType)
+    project_guid = luigi.Parameter()
     family_guids = luigi.ListParameter()
 
+    def output(self) -> luigi.Target:
+        return GCSorLocalTarget(
+            project_table_path(
+                self.reference_genome,
+                self.dataset_type,
+                self.sample_type,
+                self.project_guid,
+            ),
+        )
+
     def complete(self) -> bool:
         return super().complete() and hl.eval(
             hl.bind(
@@ -26,6 +39,26 @@ def complete(self) -> bool:
             ),
         )
 
+    def initialize_table(self) -> hl.Table:
+        key_type = self.dataset_type.table_key_type(self.reference_genome)
+        return hl.Table.parallelize(
+            [],
+            hl.tstruct(
+                **key_type,
+                filters=hl.tset(hl.tstr),
+                # NB: entries is missing here because it is untyped
+                # until we read the type off of the first callset aggregation.
+            ),
+            key=key_type.fields,
+            globals=hl.Struct(
+                family_guids=hl.empty_array(hl.tstr),
+                family_samples=hl.empty_dict(hl.tstr, hl.tarray(hl.tstr)),
+                updates=hl.empty_set(
+                    hl.tstruct(callset=hl.tstr, remap_pedigree_hash=hl.tint32),
+                ),
+            ),
+        )
+
     def update_table(self, ht: hl.Table) -> hl.Table:
         return remove_family_guids(
             ht,
diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py
index 96ded8491..739247770 100644
--- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py
+++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples.py
@@ -9,8 +9,8 @@
     lookup_table_path,
     new_variants_table_path,
 )
-from v03_pipeline.lib.tasks.base.base_project_info_params import (
-    BaseLoadingRunWithProjectInfoParams,
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
 )
 from v03_pipeline.lib.tasks.base.base_update_variant_annotations_table import (
     BaseUpdateVariantAnnotationsTableTask,
@@ -18,7 +18,7 @@
 from v03_pipeline.lib.tasks.write_new_variants_table import WriteNewVariantsTableTask
 
 
-@luigi.util.inherits(BaseLoadingRunWithProjectInfoParams)
+@luigi.util.inherits(BaseLoadingRunParams)
 class UpdateVariantAnnotationsTableWithNewSamplesTask(
     BaseUpdateVariantAnnotationsTableTask,
 ):
@@ -74,6 +74,12 @@ def update_table(self, ht: hl.Table) -> hl.Table:
             # and either present or not present in the existing annotations table.
             callset_variants_ht = ht.semi_join(callset_ht)
             ht = ht.anti_join(callset_ht)
+            lookup_ht = hl.read_table(
+                lookup_table_path(
+                    self.reference_genome,
+                    self.dataset_type,
+                ),
+            )
             callset_variants_ht = callset_variants_ht.annotate(
                 **get_fields(
                     callset_variants_ht,
@@ -89,6 +95,11 @@ def update_table(self, ht: hl.Table) -> hl.Table:
             )
             ht = ht.union(callset_variants_ht, unify=True)
 
+            # Variants may have fallen out of the callset and
+            # have been removed from the lookup table during modification.
+            # Ensure we don't proceed with those variants.
+            ht = ht.semi_join(lookup_ht)
+
         # Fix up the globals and mark the table as updated with these callset/project pairs.
         ht = self.annotate_globals(ht)
         return ht.annotate_globals(
diff --git a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py
index 0bc94f473..b5290e88c 100644
--- a/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py
+++ b/v03_pipeline/lib/tasks/update_variant_annotations_table_with_new_samples_test.py
@@ -585,7 +585,7 @@ def test_multiple_update_vat(
                     },
                     paths=hl.Struct(
                         cadd='gs://seqr-reference-data/GRCh37/CADD/CADD_snvs_and_indels.v1.6.ht',
-                        clinvar='ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
+                        clinvar='https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
                         dbnsfp='gs://seqr-reference-data/GRCh37/dbNSFP/v2.9.3/dbNSFP2.9.3_variant.ht',
                         eigen='gs://seqr-reference-data/GRCh37/eigen/EIGEN_coding_noncoding.grch37.ht',
                         exac='gs://seqr-reference-data/GRCh37/gnomad/ExAC.r1.sites.vep.ht',
@@ -724,7 +724,7 @@ def test_update_vat_grch37(
             [
                 hl.Struct(
                     cadd='gs://seqr-reference-data/GRCh37/CADD/CADD_snvs_and_indels.v1.6.ht',
-                    clinvar='ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
+                    clinvar='https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh37/clinvar.vcf.gz',
                     dbnsfp='gs://seqr-reference-data/GRCh37/dbNSFP/v2.9.3/dbNSFP2.9.3_variant.ht',
                     eigen='gs://seqr-reference-data/GRCh37/eigen/EIGEN_coding_noncoding.grch37.ht',
                     exac='gs://seqr-reference-data/GRCh37/gnomad/ExAC.r1.sites.vep.ht',
@@ -965,7 +965,7 @@ def test_mito_update_vat(
                 hl.Struct(
                     paths=hl.Struct(
                         high_constraint_region_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix high constraint intervals Feb-15-2022.tsv',
-                        clinvar_mito='ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
+                        clinvar_mito='https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz',
                         dbnsfp_mito='gs://seqr-reference-data/GRCh38/dbNSFP/v4.2/dbNSFP4.2a_variant.with_new_scores.ht',
                         gnomad_mito='gs://gcp-public-data--gnomad/release/3.1/ht/genomes/gnomad.genomes.v3.1.sites.chrM.ht',
                         helix_mito='gs://seqr-reference-data/GRCh38/mitochondrial/Helix/HelixMTdb_20200327.ht',
diff --git a/v03_pipeline/lib/tasks/validate_callset_test.py b/v03_pipeline/lib/tasks/validate_callset_test.py
index f00e5f125..991412824 100644
--- a/v03_pipeline/lib/tasks/validate_callset_test.py
+++ b/v03_pipeline/lib/tasks/validate_callset_test.py
@@ -62,6 +62,7 @@ def test_validate_callset_multiple_exceptions(
             # a NON_REF allele type at position chr1: 902024, missing
             # all contigs but chr1, and contains non-coding variants.
             callset_path=MULTIPLE_VALIDATION_EXCEPTIONS_VCF,
+            project_guids=['project_a'],
             skip_validation=False,
             run_id=TEST_RUN_ID,
         )
@@ -74,6 +75,7 @@ def test_validate_callset_multiple_exceptions(
             dataset_type=DatasetType.SNV_INDEL,
             sample_type=SampleType.WES,
             callset_path=MULTIPLE_VALIDATION_EXCEPTIONS_VCF,
+            project_guids=['project_a'],
             skip_validation=False,
             run_id=TEST_RUN_ID,
         )
@@ -82,6 +84,7 @@ def test_validate_callset_multiple_exceptions(
             self.assertDictEqual(
                 json.load(f),
                 {
+                    'project_guids': ['project_a'],
                     'error_messages': [
                         'Alleles with invalid allele <NON_REF> are present in the callset.  This appears to be a GVCF containing records for sites with no variants.',
                         "Variants are present multiple times in the callset: ['1-902088-G-A']",
diff --git a/v03_pipeline/lib/tasks/write_family_table.py b/v03_pipeline/lib/tasks/write_family_table.py
index 42715aff9..9ffbc5482 100644
--- a/v03_pipeline/lib/tasks/write_family_table.py
+++ b/v03_pipeline/lib/tasks/write_family_table.py
@@ -13,9 +13,7 @@
 
 @luigi.util.inherits(BaseLoadingRunParams)
 class WriteFamilyTableTask(BaseWriteTask):
-    project_guid = luigi.Parameter()
-    project_remap_path = luigi.Parameter()
-    project_pedigree_path = luigi.Parameter()
+    project_i = luigi.IntParameter()
     family_guid = luigi.Parameter()
 
     def output(self) -> luigi.Target:
diff --git a/v03_pipeline/lib/tasks/write_family_table_test.py b/v03_pipeline/lib/tasks/write_family_table_test.py
index 5c6995146..60d6f0e41 100644
--- a/v03_pipeline/lib/tasks/write_family_table_test.py
+++ b/v03_pipeline/lib/tasks/write_family_table_test.py
@@ -24,9 +24,10 @@ def test_snv_write_family_table_task(self) -> None:
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_SNV_INDEL_VCF,
-            project_guid='R0113_test_project',
-            project_remap_path=TEST_REMAP,
-            project_pedigree_path=TEST_PEDIGREE_3,
+            project_guids=['R0113_test_project'],
+            project_remap_paths=[TEST_REMAP],
+            project_pedigree_paths=[TEST_PEDIGREE_3],
+            project_i=0,
             family_guid='abc_1',
             skip_validation=True,
         )
@@ -162,9 +163,10 @@ def test_sv_write_family_table_task(self) -> None:
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_SV_VCF,
-            project_guid='R0115_test_project2',
-            project_remap_path='not_a_real_file',
-            project_pedigree_path=TEST_PEDIGREE_5,
+            project_guids=['R0115_test_project2'],
+            project_remap_paths=['not_a_real_file'],
+            project_pedigree_paths=[TEST_PEDIGREE_5],
+            project_i=0,
             family_guid='family_2_1',
             skip_validation=True,
         )
@@ -415,9 +417,10 @@ def test_gcnv_write_family_table_task(self) -> None:
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WES,
             callset_path=TEST_GCNV_BED_FILE,
-            project_guid='R0115_test_project2',
-            project_remap_path='not_a_real_file',
-            project_pedigree_path=TEST_PEDIGREE_5,
+            project_guids=['R0115_test_project2'],
+            project_remap_paths=['not_a_real_file'],
+            project_pedigree_paths=[TEST_PEDIGREE_5],
+            project_i=0,
             family_guid='family_2_1',
             skip_validation=True,
         )
diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run.py b/v03_pipeline/lib/tasks/write_metadata_for_run.py
index 3432dd891..cc012a926 100644
--- a/v03_pipeline/lib/tasks/write_metadata_for_run.py
+++ b/v03_pipeline/lib/tasks/write_metadata_for_run.py
@@ -4,9 +4,12 @@
 import luigi
 import luigi.util
 
-from v03_pipeline.lib.paths import metadata_for_run_path
-from v03_pipeline.lib.tasks.base.base_project_info_params import (
-    BaseLoadingRunWithProjectInfoParams,
+from v03_pipeline.lib.paths import (
+    metadata_for_run_path,
+    relatedness_check_tsv_path,
+)
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
 )
 from v03_pipeline.lib.tasks.files import GCSorLocalTarget
 from v03_pipeline.lib.tasks.write_remapped_and_subsetted_callset import (
@@ -14,7 +17,7 @@
 )
 
 
-@luigi.util.inherits(BaseLoadingRunWithProjectInfoParams)
+@luigi.util.inherits(BaseLoadingRunParams)
 class WriteMetadataForRunTask(luigi.Task):
     def output(self) -> luigi.Target:
         return GCSorLocalTarget(
@@ -29,16 +32,9 @@ def requires(self) -> list[luigi.Task]:
         return [
             self.clone(
                 WriteRemappedAndSubsettedCallsetTask,
-                project_guid=project_guid,
-                project_remap_path=project_remap_path,
-                project_pedigree_path=project_pedigree_path,
-            )
-            for (project_guid, project_remap_path, project_pedigree_path) in zip(
-                self.project_guids,
-                self.project_remap_paths,
-                self.project_pedigree_paths,
-                strict=True,
+                project_i=i,
             )
+            for i in range(len(self.project_guids))
         ]
 
     def run(self) -> None:
@@ -46,12 +42,18 @@ def run(self) -> None:
             'callsets': [self.callset_path],
             'run_id': self.run_id,
             'sample_type': self.sample_type.value,
+            'project_guids': self.project_guids,
             'family_samples': {},
             'failed_family_samples': {
                 'missing_samples': {},
                 'relatedness_check': {},
                 'sex_check': {},
             },
+            'relatedness_check_file_path': relatedness_check_tsv_path(
+                self.reference_genome,
+                self.dataset_type,
+                self.callset_path,
+            ),
         }
         for remapped_and_subsetted_callset in self.input():
             callset_mt = hl.read_matrix_table(remapped_and_subsetted_callset.path)
diff --git a/v03_pipeline/lib/tasks/write_metadata_for_run_test.py b/v03_pipeline/lib/tasks/write_metadata_for_run_test.py
index f5d733a79..dc007fbcb 100644
--- a/v03_pipeline/lib/tasks/write_metadata_for_run_test.py
+++ b/v03_pipeline/lib/tasks/write_metadata_for_run_test.py
@@ -3,6 +3,7 @@
 import luigi.worker
 
 from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
+from v03_pipeline.lib.paths import relatedness_check_tsv_path
 from v03_pipeline.lib.tasks.write_metadata_for_run import WriteMetadataForRunTask
 from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
 
@@ -37,6 +38,7 @@ def test_write_metadata_for_run_task(self) -> None:
                 json.load(f),
                 {
                     'callsets': [TEST_VCF],
+                    'project_guids': ['R0113_test_project', 'R0114_project4'],
                     'failed_family_samples': {
                         'missing_samples': {
                             'efg_1': {
@@ -70,5 +72,10 @@ def test_write_metadata_for_run_task(self) -> None:
                     },
                     'run_id': 'run_123456',
                     'sample_type': SampleType.WGS.value,
+                    'relatedness_check_file_path': relatedness_check_tsv_path(
+                        ReferenceGenome.GRCh38,
+                        DatasetType.SNV_INDEL,
+                        TEST_VCF,
+                    ),
                 },
             )
diff --git a/v03_pipeline/lib/tasks/write_new_variants_table.py b/v03_pipeline/lib/tasks/write_new_variants_table.py
index f8b1c570b..a312084b4 100644
--- a/v03_pipeline/lib/tasks/write_new_variants_table.py
+++ b/v03_pipeline/lib/tasks/write_new_variants_table.py
@@ -24,8 +24,8 @@
     load_gencode_ensembl_to_refseq_id,
     load_gencode_gene_symbol_to_gene_id,
 )
-from v03_pipeline.lib.tasks.base.base_project_info_params import (
-    BaseLoadingRunWithProjectInfoParams,
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
 )
 from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
 from v03_pipeline.lib.tasks.files import GCSorLocalTarget
@@ -45,7 +45,7 @@
 GENCODE_FOR_VEP_RELEASE = 44
 
 
-@luigi.util.inherits(BaseLoadingRunWithProjectInfoParams)
+@luigi.util.inherits(BaseLoadingRunParams)
 class WriteNewVariantsTableTask(BaseWriteTask):
     @property
     def annotation_dependencies(self) -> dict[str, hl.Table]:
@@ -58,7 +58,7 @@ def annotation_dependencies(self) -> dict[str, hl.Table]:
             )
         if self.dataset_type.has_gencode_gene_symbol_to_gene_id_mapping:
             deps['gencode_gene_symbol_to_gene_id_mapping'] = hl.literal(
-                load_gencode_gene_symbol_to_gene_id(GENCODE_RELEASE, ''),
+                load_gencode_gene_symbol_to_gene_id(GENCODE_RELEASE),
             )
         deps[
             'grch37_to_grch38_liftover_ref_path'
@@ -79,10 +79,7 @@ def output(self) -> luigi.Target:
 
     def requires(self) -> list[luigi.Task]:
         requirements = [
-            UpdateVariantAnnotationsTableWithUpdatedReferenceDataset(
-                self.reference_genome,
-                self.dataset_type,
-            ),
+            self.clone(UpdateVariantAnnotationsTableWithUpdatedReferenceDataset),
         ]
         if self.dataset_type.has_lookup_table:
             # NB: the lookup table task has remapped and subsetted callset tasks as dependencies.
diff --git a/v03_pipeline/lib/tasks/write_project_family_tables.py b/v03_pipeline/lib/tasks/write_project_family_tables.py
index f9b7df74f..7085a3aa1 100644
--- a/v03_pipeline/lib/tasks/write_project_family_tables.py
+++ b/v03_pipeline/lib/tasks/write_project_family_tables.py
@@ -2,19 +2,18 @@
 import luigi
 import luigi.util
 
-from v03_pipeline.lib.misc.io import import_pedigree
-from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families
+from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path
 from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
-from v03_pipeline.lib.tasks.files import RawFileTask
 from v03_pipeline.lib.tasks.update_project_table import UpdateProjectTableTask
 from v03_pipeline.lib.tasks.write_family_table import WriteFamilyTableTask
+from v03_pipeline.lib.tasks.write_remapped_and_subsetted_callset import (
+    WriteRemappedAndSubsettedCallsetTask,
+)
 
 
 @luigi.util.inherits(BaseLoadingRunParams)
 class WriteProjectFamilyTablesTask(luigi.Task):
-    project_guid = luigi.Parameter()
-    project_remap_path = luigi.Parameter()
-    project_pedigree_path = luigi.Parameter()
+    project_i = luigi.IntParameter()
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -26,27 +25,26 @@ def complete(self) -> bool:
             for write_family_table_task in self.dynamic_write_family_table_tasks
         )
 
-    def run(self):
-        # https://luigi.readthedocs.io/en/stable/tasks.html#dynamic-dependencies
-        # Fetch family guids from project table
-        update_project_table_task: luigi.Target = yield self.clone(
-            UpdateProjectTableTask,
-        )
-        project_ht = hl.read_table(update_project_table_task.path)
-        family_guids_in_project_table = set(hl.eval(project_ht.globals.family_guids))
+    def requires(self) -> list[luigi.Task]:
+        return [
+            self.clone(
+                WriteRemappedAndSubsettedCallsetTask,
+            ),
+            self.clone(
+                UpdateProjectTableTask,
+            ),
+        ]
 
-        # Fetch family guids from pedigree
-        pedigree_ht_task: luigi.Target = yield RawFileTask(self.project_pedigree_path)
-        pedigree_ht = import_pedigree(pedigree_ht_task.path)
-        families_guids_in_pedigree = {
-            f.family_guid for f in parse_pedigree_ht_to_families(pedigree_ht)
-        }
-
-        # Intersect them
-        family_guids_to_load = (
-            family_guids_in_project_table & families_guids_in_pedigree
+    def run(self):
+        ht = hl.read_matrix_table(
+            remapped_and_subsetted_callset_path(
+                self.reference_genome,
+                self.dataset_type,
+                self.callset_path,
+                self.project_guids[self.project_i],
+            ),
         )
-        for family_guid in family_guids_to_load:
+        for family_guid in set(hl.eval(ht.globals.family_samples).keys()):
             self.dynamic_write_family_table_tasks.add(
                 self.clone(WriteFamilyTableTask, family_guid=family_guid),
             )
diff --git a/v03_pipeline/lib/tasks/write_project_family_tables_test.py b/v03_pipeline/lib/tasks/write_project_family_tables_test.py
index 3d23e9b60..dd535f988 100644
--- a/v03_pipeline/lib/tasks/write_project_family_tables_test.py
+++ b/v03_pipeline/lib/tasks/write_project_family_tables_test.py
@@ -2,7 +2,10 @@
 import luigi.worker
 
 from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
-from v03_pipeline.lib.paths import project_table_path
+from v03_pipeline.lib.paths import (
+    project_table_path,
+    remapped_and_subsetted_callset_path,
+)
 from v03_pipeline.lib.tasks.write_project_family_tables import (
     WriteProjectFamilyTablesTask,
 )
@@ -25,9 +28,10 @@ def test_snv_write_project_family_tables_task(self) -> None:
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_SNV_INDEL_VCF,
-            project_guid='R0113_test_project',
-            project_remap_path=TEST_REMAP,
-            project_pedigree_path=TEST_PEDIGREE_4,
+            project_guids=['R0113_test_project'],
+            project_remap_paths=[TEST_REMAP],
+            project_pedigree_paths=[TEST_PEDIGREE_4],
+            project_i=0,
             skip_validation=True,
             skip_check_sex_and_relatedness=True,
         )
@@ -38,6 +42,33 @@ def test_snv_write_project_family_tables_task(self) -> None:
             hl.read_table(write_family_table_task.output().path)
             for write_family_table_task in write_project_family_tables.dynamic_write_family_table_tasks
         ]
+        # Validate remapped and subsetted callset families
+        remapped_and_subsetted_callset = hl.read_matrix_table(
+            remapped_and_subsetted_callset_path(
+                ReferenceGenome.GRCh38,
+                DatasetType.SNV_INDEL,
+                TEST_SNV_INDEL_VCF,
+                'R0113_test_project',
+            ),
+        )
+        self.assertCountEqual(
+            hl.eval(remapped_and_subsetted_callset.globals.family_samples.keys()),
+            {
+                '123_1',
+                '234_1',
+                '345_1',
+                '456_1',
+                '567_1',
+                '678_1',
+                '789_1',
+                '890_1',
+                '901_1',
+                'bcd_1',
+                'cde_1',
+                'def_1',
+                'efg_1',
+            },
+        )
         self.assertCountEqual(
             [ht.globals.sample_ids.collect() for ht in hts],
             [
@@ -63,9 +94,10 @@ def test_snv_write_project_family_tables_task(self) -> None:
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_SNV_INDEL_VCF,
-            project_guid='R0113_test_project',
-            project_remap_path=TEST_REMAP,
-            project_pedigree_path=TEST_PEDIGREE_4_SUBSET,
+            project_guids=['R0113_test_project'],
+            project_remap_paths=[TEST_REMAP],
+            project_pedigree_paths=[TEST_PEDIGREE_4_SUBSET],
+            project_i=0,
             skip_validation=True,
             skip_check_sex_and_relatedness=True,
         )
@@ -73,13 +105,39 @@ def test_snv_write_project_family_tables_task(self) -> None:
         worker.run()
         self.assertTrue(write_project_family_tables_subset.complete())
         hts = [
-            hl.read_table(write_family_table_task.output().path)
+            write_family_table_task.output().path
             for write_family_table_task in write_project_family_tables_subset.dynamic_write_family_table_tasks
         ]
-        # Only one family table written
-        self.assertEqual(
-            len(hts),
-            1,
+        self.assertTrue(len(hts))
+        self.assertTrue(
+            '123_1' in hts[0],
+        )
+        # Validate remapped and subsetted callset families
+        # (and that it was re-written)
+        remapped_and_subsetted_callset = hl.read_matrix_table(
+            remapped_and_subsetted_callset_path(
+                ReferenceGenome.GRCh38,
+                DatasetType.SNV_INDEL,
+                TEST_SNV_INDEL_VCF,
+                'R0113_test_project',
+            ),
+        )
+        self.assertCountEqual(
+            hl.eval(remapped_and_subsetted_callset.globals.family_samples.keys()),
+            {'123_1'},
+        )
+        self.assertCountEqual(
+            hl.eval(remapped_and_subsetted_callset.globals.failed_family_samples),
+            hl.Struct(
+                missing_samples={
+                    '234_1': {
+                        'reasons': ["Missing samples: {'NA19678_999'}"],
+                        'samples': ['NA19678_1', 'NA19678_999'],
+                    },
+                },
+                relatedness_check={},
+                sex_check={},
+            ),
         )
         # Project table still contains all family guids
         self.assertCountEqual(
diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_tsv.py b/v03_pipeline/lib/tasks/write_relatedness_check_tsv.py
new file mode 100644
index 000000000..bfe303a4a
--- /dev/null
+++ b/v03_pipeline/lib/tasks/write_relatedness_check_tsv.py
@@ -0,0 +1,29 @@
+import hail as hl
+import luigi
+import luigi.util
+
+from v03_pipeline.lib.paths import relatedness_check_tsv_path
+from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
+from v03_pipeline.lib.tasks.files import GCSorLocalTarget
+from v03_pipeline.lib.tasks.write_relatedness_check_table import (
+    WriteRelatednessCheckTableTask,
+)
+
+
+@luigi.util.inherits(BaseLoadingRunParams)
+class WriteRelatednessCheckTsvTask(luigi.Task):
+    def output(self) -> luigi.Target:
+        return GCSorLocalTarget(
+            relatedness_check_tsv_path(
+                self.reference_genome,
+                self.dataset_type,
+                self.callset_path,
+            ),
+        )
+
+    def requires(self):
+        return [self.clone(WriteRelatednessCheckTableTask)]
+
+    def run(self):
+        ht = hl.read_table(self.input()[0].path)
+        ht.export(self.output().path)
diff --git a/v03_pipeline/lib/tasks/write_relatedness_check_tsv_test.py b/v03_pipeline/lib/tasks/write_relatedness_check_tsv_test.py
new file mode 100644
index 000000000..49f174340
--- /dev/null
+++ b/v03_pipeline/lib/tasks/write_relatedness_check_tsv_test.py
@@ -0,0 +1,53 @@
+import shutil
+
+import luigi.worker
+
+from v03_pipeline.lib.model import DatasetType, ReferenceGenome, SampleType
+from v03_pipeline.lib.paths import relatedness_check_table_path
+from v03_pipeline.lib.tasks.write_relatedness_check_tsv import (
+    WriteRelatednessCheckTsvTask,
+)
+from v03_pipeline.lib.test.mocked_dataroot_testcase import MockedDatarootTestCase
+
+TEST_RELATEDNESS_CHECK_1 = (
+    'v03_pipeline/var/test/relatedness_check/test_relatedness_check_1.ht'
+)
+TEST_VCF = 'v03_pipeline/var/test/callsets/1kg_30variants.vcf'
+TEST_RUN_ID = 'manual__2024-04-03'
+
+
+class WriteRelatednessCheckTsvTaskTest(MockedDatarootTestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        shutil.copytree(
+            TEST_RELATEDNESS_CHECK_1,
+            relatedness_check_table_path(
+                ReferenceGenome.GRCh38,
+                DatasetType.SNV_INDEL,
+                TEST_VCF,
+            ),
+        )
+
+    def test_write_relatedness_check_tsv_task(
+        self,
+    ) -> None:
+        worker = luigi.worker.Worker()
+        task = WriteRelatednessCheckTsvTask(
+            reference_genome=ReferenceGenome.GRCh38,
+            dataset_type=DatasetType.SNV_INDEL,
+            callset_path=TEST_VCF,
+            run_id=TEST_RUN_ID,
+            sample_type=SampleType.WES,
+        )
+        worker.add(task)
+        worker.run()
+        self.assertTrue(task.complete())
+        with task.output().open('r') as f:
+            lines = f.readlines()
+            expected_lines = [
+                'i\tj\tibd0\tibd1\tibd2\tpi_hat\n',
+                'HG00731_1\tHG00733_1\t0\t1\t0\t5.0000e-01\n',
+                'HG00732_1\tHG00733_1\t0\t1\t0\t5.0000e-01\n',
+            ]
+            for expected_line, actual_line in zip(expected_lines, lines, strict=False):
+                self.assertEqual(expected_line, actual_line)
diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py
index e3e0a0e4f..f4c934662 100644
--- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py
+++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset.py
@@ -17,13 +17,16 @@
 from v03_pipeline.lib.misc.pedigree import parse_pedigree_ht_to_families
 from v03_pipeline.lib.misc.sample_ids import remap_sample_ids, subset_samples
 from v03_pipeline.lib.model.environment import Env
-from v03_pipeline.lib.paths import remapped_and_subsetted_callset_path
+from v03_pipeline.lib.paths import (
+    relatedness_check_table_path,
+    remapped_and_subsetted_callset_path,
+)
 from v03_pipeline.lib.tasks.base.base_loading_run_params import BaseLoadingRunParams
 from v03_pipeline.lib.tasks.base.base_write import BaseWriteTask
 from v03_pipeline.lib.tasks.files import GCSorLocalTarget, RawFileTask
 from v03_pipeline.lib.tasks.validate_callset import ValidateCallsetTask
-from v03_pipeline.lib.tasks.write_relatedness_check_table import (
-    WriteRelatednessCheckTableTask,
+from v03_pipeline.lib.tasks.write_relatedness_check_tsv import (
+    WriteRelatednessCheckTsvTask,
 )
 from v03_pipeline.lib.tasks.write_sex_check_table import WriteSexCheckTableTask
 
@@ -32,16 +35,14 @@
 
 @luigi.util.inherits(BaseLoadingRunParams)
 class WriteRemappedAndSubsettedCallsetTask(BaseWriteTask):
-    project_guid = luigi.Parameter()
-    project_remap_path = luigi.Parameter()
-    project_pedigree_path = luigi.Parameter()
+    project_i = luigi.IntParameter()
 
     def complete(self) -> luigi.Target:
         return super().complete() and hl.eval(
             hl.read_matrix_table(self.output().path).globals.remap_pedigree_hash
             == remap_pedigree_hash(
-                self.project_remap_path,
-                self.project_pedigree_path,
+                self.project_remap_paths[self.project_i],
+                self.project_pedigree_paths[self.project_i],
             ),
         )
 
@@ -51,14 +52,14 @@ def output(self) -> luigi.Target:
                 self.reference_genome,
                 self.dataset_type,
                 self.callset_path,
-                self.project_guid,
+                self.project_guids[self.project_i],
             ),
         )
 
     def requires(self) -> list[luigi.Task]:
         requirements = [
             self.clone(ValidateCallsetTask),
-            RawFileTask(self.project_pedigree_path),
+            RawFileTask(self.project_pedigree_paths[self.project_i]),
         ]
         if (
             Env.CHECK_SEX_AND_RELATEDNESS
@@ -67,7 +68,7 @@ def requires(self) -> list[luigi.Task]:
         ):
             requirements = [
                 *requirements,
-                self.clone(WriteRelatednessCheckTableTask),
+                self.clone(WriteRelatednessCheckTsvTask),
                 self.clone(WriteSexCheckTableTask),
             ]
         return requirements
@@ -78,8 +79,8 @@ def create_table(self) -> hl.MatrixTable:
 
         # Remap, but only if the remap file is present!
         remap_lookup = hl.empty_dict(hl.tstr, hl.tstr)
-        if does_file_exist(self.project_remap_path):
-            project_remap_ht = import_remap(self.project_remap_path)
+        if does_file_exist(self.project_remap_paths[self.project_i]):
+            project_remap_ht = import_remap(self.project_remap_paths[self.project_i])
             callset_mt = remap_sample_ids(
                 callset_mt,
                 project_remap_ht,
@@ -101,7 +102,13 @@ def create_table(self) -> hl.MatrixTable:
             and self.dataset_type.check_sex_and_relatedness
             and not self.skip_check_sex_and_relatedness
         ):
-            relatedness_check_ht = hl.read_table(self.input()[2].path)
+            relatedness_check_ht = hl.read_table(
+                relatedness_check_table_path(
+                    self.reference_genome,
+                    self.dataset_type,
+                    self.callset_path,
+                ),
+            )
             sex_check_ht = hl.read_table(self.input()[3].path)
             families_failed_relatedness_check = get_families_failed_relatedness_check(
                 families - families_failed_missing_samples.keys(),
@@ -153,8 +160,8 @@ def create_table(self) -> hl.MatrixTable:
                 mt = mt.drop(field)
         return mt.select_globals(
             remap_pedigree_hash=remap_pedigree_hash(
-                self.project_remap_path,
-                self.project_pedigree_path,
+                self.project_remap_paths[self.project_i],
+                self.project_pedigree_paths[self.project_i],
             ),
             family_samples=(
                 {
diff --git a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py
index 1ed7550a6..4a0c84660 100644
--- a/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py
+++ b/v03_pipeline/lib/tasks/write_remapped_and_subsetted_callset_test.py
@@ -84,9 +84,10 @@ def test_write_remapped_and_subsetted_callset_task(
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_VCF,
-            project_guid='R0113_test_project',
-            project_remap_path=TEST_REMAP,
-            project_pedigree_path=TEST_PEDIGREE_3,
+            project_guids=['R0113_test_project'],
+            project_remap_paths=[TEST_REMAP],
+            project_pedigree_paths=[TEST_PEDIGREE_3],
+            project_i=0,
             skip_validation=True,
         )
         worker.add(wrsc_task)
@@ -127,9 +128,10 @@ def test_write_remapped_and_subsetted_callset_task_failed_sex_check_family(
             run_id=TEST_RUN_ID,
             sample_type=SampleType.WGS,
             callset_path=TEST_VCF,
-            project_guid='R0114_project4',
-            project_remap_path=TEST_REMAP,
-            project_pedigree_path=TEST_PEDIGREE_4,
+            project_guids=['R0114_project4'],
+            project_remap_paths=[TEST_REMAP],
+            project_pedigree_paths=[TEST_PEDIGREE_4],
+            project_i=0,
             skip_validation=True,
         )
         worker.add(wrsc_task)
diff --git a/v03_pipeline/lib/tasks/write_success_file.py b/v03_pipeline/lib/tasks/write_success_file.py
index 3576a8d33..3dc471063 100644
--- a/v03_pipeline/lib/tasks/write_success_file.py
+++ b/v03_pipeline/lib/tasks/write_success_file.py
@@ -3,8 +3,8 @@
 
 from v03_pipeline.lib.paths import pipeline_run_success_file_path
 from v03_pipeline.lib.tasks import WriteProjectFamilyTablesTask
-from v03_pipeline.lib.tasks.base.base_project_info_params import (
-    BaseLoadingRunWithProjectInfoParams,
+from v03_pipeline.lib.tasks.base.base_loading_run_params import (
+    BaseLoadingRunParams,
 )
 from v03_pipeline.lib.tasks.files import GCSorLocalTarget
 from v03_pipeline.lib.tasks.update_variant_annotations_table_with_new_samples import (
@@ -12,7 +12,7 @@
 )
 
 
-@luigi.util.inherits(BaseLoadingRunWithProjectInfoParams)
+@luigi.util.inherits(BaseLoadingRunParams)
 class WriteSuccessFileTask(luigi.Task):
     def output(self) -> luigi.Target:
         return GCSorLocalTarget(
@@ -32,9 +32,7 @@ def requires(self):
             *[
                 self.clone(
                     WriteProjectFamilyTablesTask,
-                    project_guid=self.project_guids[i],
-                    project_remap_path=self.project_remap_paths[i],
-                    project_pedigree_path=self.project_pedigree_paths[i],
+                    project_i=i,
                 )
                 for i in range(len(self.project_guids))
             ],
diff --git a/v03_pipeline/lib/tasks/write_validation_errors_for_run.py b/v03_pipeline/lib/tasks/write_validation_errors_for_run.py
index eaefb0e8c..9149f6158 100644
--- a/v03_pipeline/lib/tasks/write_validation_errors_for_run.py
+++ b/v03_pipeline/lib/tasks/write_validation_errors_for_run.py
@@ -10,6 +10,7 @@
 
 @luigi.util.inherits(BaseLoadingRunParams)
 class WriteValidationErrorsForRunTask(luigi.Task):
+    project_guids = luigi.ListParameter()
     error_messages = luigi.ListParameter(default=[])
 
     def to_single_error_message(self) -> str:
@@ -30,6 +31,7 @@ def output(self) -> luigi.Target:
 
     def run(self) -> None:
         validation_errors_json = {
+            'project_guids': self.project_guids,
             'error_messages': self.error_messages,
         }
         with self.output().open('w') as f:
diff --git a/v03_pipeline/var/test/pedigrees/test_pedigree_4_subset.tsv b/v03_pipeline/var/test/pedigrees/test_pedigree_4_subset.tsv
index 63e2addd8..dc022f159 100644
--- a/v03_pipeline/var/test/pedigrees/test_pedigree_4_subset.tsv
+++ b/v03_pipeline/var/test/pedigrees/test_pedigree_4_subset.tsv
@@ -1,2 +1,4 @@
 Project_GUID	Family_GUID	Family_ID	Individual_ID	Paternal_ID	Maternal_ID	Sex
 R0114_project4	123_1	123	NA19675_1			F
+R0114_project4	234_1	234	NA19678_1			M
+R0114_project4	234_1	234	NA19678_999			F
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/.README.txt.crc b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/.README.txt.crc
index 22d3757fc..add5a1942 100644
Binary files a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/.README.txt.crc and b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/.README.txt.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/.metadata.json.gz.crc
index 800ce4b09..3a7d8101c 100644
Binary files a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/README.txt b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/README.txt
index 7ed0c7ae4..9aea8fa4b 100644
--- a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/README.txt
+++ b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.120-f00f916faf78
-  Created at 2024/03/15 15:45:48
\ No newline at end of file
+  Written with version 0.2.133-4c60fddb171a
+  Created at 2024/11/02 13:12:12
\ No newline at end of file
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/globals/parts/.part-0.crc b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/globals/parts/.part-0.crc
index 7e41d08ed..c96ad70c9 100644
Binary files a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/globals/parts/.part-0.crc and b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/globals/parts/.part-0.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/globals/parts/part-0 b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/globals/parts/part-0
index 67904bd84..bb1d53943 100644
Binary files a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/globals/parts/part-0 and b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/globals/parts/part-0 differ
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.idx/.index.crc b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-9e75273d-7113-40e4-a327-453f3451dc8c.idx/.index.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.idx/.index.crc
rename to v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-9e75273d-7113-40e4-a327-453f3451dc8c.idx/.index.crc
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.idx/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-9e75273d-7113-40e4-a327-453f3451dc8c.idx/.metadata.json.gz.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.idx/.metadata.json.gz.crc
rename to v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-9e75273d-7113-40e4-a327-453f3451dc8c.idx/.metadata.json.gz.crc
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.idx/index b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-9e75273d-7113-40e4-a327-453f3451dc8c.idx/index
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.idx/index
rename to v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-9e75273d-7113-40e4-a327-453f3451dc8c.idx/index
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.idx/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-9e75273d-7113-40e4-a327-453f3451dc8c.idx/metadata.json.gz
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.idx/metadata.json.gz
rename to v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/index/part-0-9e75273d-7113-40e4-a327-453f3451dc8c.idx/metadata.json.gz
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/metadata.json.gz
index 213fc997c..5aed747bc 100644
Binary files a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/.metadata.json.gz.crc
index d581eec45..682fea6e7 100644
Binary files a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/metadata.json.gz
index 0b6e1772c..d37774da9 100644
Binary files a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/parts/.part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.crc b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/parts/.part-0-9e75273d-7113-40e4-a327-453f3451dc8c.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/parts/.part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a.crc
rename to v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/parts/.part-0-9e75273d-7113-40e4-a327-453f3451dc8c.crc
diff --git a/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/parts/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a b/v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/parts/part-0-9e75273d-7113-40e4-a327-453f3451dc8c
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/parts/part-0-d5c50933-cc72-4716-b930-1e885aa0ba7a
rename to v03_pipeline/var/test/reference_data/test_clinvar_path_variants_crdq.ht/rows/parts/part-0-9e75273d-7113-40e4-a327-453f3451dc8c
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/.README.txt.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/.README.txt.crc
new file mode 100644
index 000000000..e175e8da4
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/.README.txt.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/._SUCCESS.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/._SUCCESS.crc
new file mode 100644
index 000000000..3b7b04493
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/._SUCCESS.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/.metadata.json.gz.crc
new file mode 100644
index 000000000..5def68f7f
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/README.txt b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/README.txt
new file mode 100644
index 000000000..1b764aef2
--- /dev/null
+++ b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/README.txt
@@ -0,0 +1,3 @@
+This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
+  Written with version 0.2.133-4c60fddb171a
+  Created at 2024/11/02 13:13:26
\ No newline at end of file
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/_SUCCESS b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/_SUCCESS
new file mode 100644
index 000000000..e69de29bb
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/.metadata.json.gz.crc
new file mode 100644
index 000000000..92c2ee4f3
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/metadata.json.gz
new file mode 100644
index 000000000..26e678a01
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/parts/.part-0.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/parts/.part-0.crc
new file mode 100644
index 000000000..66c495184
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/parts/.part-0.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/parts/part-0 b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/parts/part-0
new file mode 100644
index 000000000..31232639d
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/globals/parts/part-0 differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.idx/.index.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/index/part-0-3569201c-d630-43c4-9056-cbace806fe8d.idx/.index.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.idx/.index.crc
rename to v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/index/part-0-3569201c-d630-43c4-9056-cbace806fe8d.idx/.index.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.idx/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/index/part-0-3569201c-d630-43c4-9056-cbace806fe8d.idx/.metadata.json.gz.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.idx/.metadata.json.gz.crc
rename to v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/index/part-0-3569201c-d630-43c4-9056-cbace806fe8d.idx/.metadata.json.gz.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.idx/index b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/index/part-0-3569201c-d630-43c4-9056-cbace806fe8d.idx/index
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.idx/index
rename to v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/index/part-0-3569201c-d630-43c4-9056-cbace806fe8d.idx/index
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.idx/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/index/part-0-3569201c-d630-43c4-9056-cbace806fe8d.idx/metadata.json.gz
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.idx/metadata.json.gz
rename to v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/index/part-0-3569201c-d630-43c4-9056-cbace806fe8d.idx/metadata.json.gz
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/metadata.json.gz
new file mode 100644
index 000000000..351b9c8a1
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/.metadata.json.gz.crc
new file mode 100644
index 000000000..edeb97082
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/metadata.json.gz
new file mode 100644
index 000000000..8ab2a9563
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/.part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/parts/.part-0-3569201c-d630-43c4-9056-cbace806fe8d.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/.part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad.crc
rename to v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/parts/.part-0-3569201c-d630-43c4-9056-cbace806fe8d.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad b/v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/parts/part-0-3569201c-d630-43c4-9056-cbace806fe8d
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/part-0-6a5a9d6a-4ded-424b-9735-922a5346e7ad
rename to v03_pipeline/var/test/reference_data/test_combined_1.ht.ht/rows/parts/part-0-3569201c-d630-43c4-9056-cbace806fe8d
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/.README.txt.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht/.README.txt.crc
index 1c47b9a3c..2796480e9 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/.README.txt.crc and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/.README.txt.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht/.metadata.json.gz.crc
index db7a7824c..5def68f7f 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/README.txt b/v03_pipeline/var/test/reference_data/test_combined_1.ht/README.txt
index e46de4296..9b284affa 100644
--- a/v03_pipeline/var/test/reference_data/test_combined_1.ht/README.txt
+++ b/v03_pipeline/var/test/reference_data/test_combined_1.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.130-bea04d9c79b5
-  Created at 2024/05/20 13:48:16
\ No newline at end of file
+  Written with version 0.2.133-4c60fddb171a
+  Created at 2024/11/02 15:22:20
\ No newline at end of file
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/.metadata.json.gz.crc
index b47637bf1..92c2ee4f3 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/metadata.json.gz
index 534d126c7..26e678a01 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/parts/.part-0.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/parts/.part-0.crc
index 808712b8f..66c495184 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/parts/.part-0.crc and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/parts/.part-0.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/parts/part-0 b/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/parts/part-0
index ef03f366e..31232639d 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/parts/part-0 and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/globals/parts/part-0 differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/.index.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/.index.crc
new file mode 100644
index 000000000..7cb9c5aaf
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/.index.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/.metadata.json.gz.crc
new file mode 100644
index 000000000..9af5fa925
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/index b/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/index
new file mode 100644
index 000000000..a979d82bf
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/index differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/metadata.json.gz
new file mode 100644
index 000000000..051d3e03d
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/index/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.idx/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_1.ht/metadata.json.gz
index d00565756..351b9c8a1 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/.metadata.json.gz.crc
index ddb5e7f25..e7c96acca 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/metadata.json.gz
index 19968eb85..d2c7ccb1c 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/.part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.crc b/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/.part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.crc
new file mode 100644
index 000000000..dd555f553
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/.part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2 b/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2
new file mode 100644
index 000000000..446fb5491
Binary files /dev/null and b/v03_pipeline/var/test/reference_data/test_combined_1.ht/rows/parts/part-0-1d126232-414b-4ffa-aa43-9ed52895fbf2 differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/.README.txt.crc b/v03_pipeline/var/test/reference_data/test_combined_37.ht/.README.txt.crc
index 1b96b5393..394adb99d 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_37.ht/.README.txt.crc and b/v03_pipeline/var/test/reference_data/test_combined_37.ht/.README.txt.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_37.ht/.metadata.json.gz.crc
index 82e0d4035..6b72fb1f0 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_37.ht/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_combined_37.ht/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/README.txt b/v03_pipeline/var/test/reference_data/test_combined_37.ht/README.txt
index e38d73d71..f5927612a 100644
--- a/v03_pipeline/var/test/reference_data/test_combined_37.ht/README.txt
+++ b/v03_pipeline/var/test/reference_data/test_combined_37.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.130-bea04d9c79b5
-  Created at 2024/05/20 15:38:26
\ No newline at end of file
+  Written with version 0.2.133-4c60fddb171a
+  Created at 2024/11/02 13:18:45
\ No newline at end of file
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/globals/parts/.part-0.crc b/v03_pipeline/var/test/reference_data/test_combined_37.ht/globals/parts/.part-0.crc
index f3ed5e11b..3181e5991 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_37.ht/globals/parts/.part-0.crc and b/v03_pipeline/var/test/reference_data/test_combined_37.ht/globals/parts/.part-0.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/globals/parts/part-0 b/v03_pipeline/var/test/reference_data/test_combined_37.ht/globals/parts/part-0
index 259a7345f..92eda86fb 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_37.ht/globals/parts/part-0 and b/v03_pipeline/var/test/reference_data/test_combined_37.ht/globals/parts/part-0 differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.idx/.index.crc b/v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.idx/.index.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.idx/.index.crc
rename to v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.idx/.index.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.idx/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.idx/.metadata.json.gz.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.idx/.metadata.json.gz.crc
rename to v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.idx/.metadata.json.gz.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.idx/index b/v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.idx/index
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.idx/index
rename to v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.idx/index
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.idx/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.idx/metadata.json.gz
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.idx/metadata.json.gz
rename to v03_pipeline/var/test/reference_data/test_combined_37.ht/index/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.idx/metadata.json.gz
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_37.ht/metadata.json.gz
index 00685b5ad..91f89d511 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_37.ht/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_combined_37.ht/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/.metadata.json.gz.crc
index 8474f90b1..580630336 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/metadata.json.gz
index b83d7239a..c22d07b9f 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/parts/.part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.crc b/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/parts/.part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/parts/.part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff.crc
rename to v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/parts/.part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/parts/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff b/v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/parts/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/parts/part-0-ac85fcb0-1e7c-453f-be81-9cd356dc49ff
rename to v03_pipeline/var/test/reference_data/test_combined_37.ht/rows/parts/part-0-6353b1d7-bc23-4f3a-9fa2-dd9321ab97a2
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/.README.txt.crc b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/.README.txt.crc
index e08d4d12b..b76813439 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/.README.txt.crc and b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/.README.txt.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/.metadata.json.gz.crc
index d328f484c..02e51be97 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/README.txt b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/README.txt
index 704275b10..f7cb50ea3 100644
--- a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/README.txt
+++ b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.130-bea04d9c79b5
-  Created at 2024/07/24 14:11:11
\ No newline at end of file
+  Written with version 0.2.133-4c60fddb171a
+  Created at 2024/11/02 15:10:48
\ No newline at end of file
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/globals/parts/.part-0.crc b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/globals/parts/.part-0.crc
index 7b3d99c48..21abd8af5 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/globals/parts/.part-0.crc and b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/globals/parts/.part-0.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/globals/parts/part-0 b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/globals/parts/part-0
index 2493dddf9..6e7d4be57 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/globals/parts/part-0 and b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/globals/parts/part-0 differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.idx/.index.crc b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-3c042736-0e6c-4911-9b80-b9356af9df25.idx/.index.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.idx/.index.crc
rename to v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-3c042736-0e6c-4911-9b80-b9356af9df25.idx/.index.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.idx/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-3c042736-0e6c-4911-9b80-b9356af9df25.idx/.metadata.json.gz.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.idx/.metadata.json.gz.crc
rename to v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-3c042736-0e6c-4911-9b80-b9356af9df25.idx/.metadata.json.gz.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.idx/index b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-3c042736-0e6c-4911-9b80-b9356af9df25.idx/index
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.idx/index
rename to v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-3c042736-0e6c-4911-9b80-b9356af9df25.idx/index
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.idx/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-3c042736-0e6c-4911-9b80-b9356af9df25.idx/metadata.json.gz
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.idx/metadata.json.gz
rename to v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/index/part-0-3c042736-0e6c-4911-9b80-b9356af9df25.idx/metadata.json.gz
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/metadata.json.gz
index 95672cd45..a43d1f48c 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/.metadata.json.gz.crc b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/.metadata.json.gz.crc
index a927fa9da..9a55fd307 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/.metadata.json.gz.crc and b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/.metadata.json.gz.crc differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/metadata.json.gz b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/metadata.json.gz
index 213bdb7aa..dc89f5aa6 100644
Binary files a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/metadata.json.gz and b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/metadata.json.gz differ
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/parts/.part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.crc b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/parts/.part-0-3c042736-0e6c-4911-9b80-b9356af9df25.crc
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/parts/.part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90.crc
rename to v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/parts/.part-0-3c042736-0e6c-4911-9b80-b9356af9df25.crc
diff --git a/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/parts/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90 b/v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/parts/part-0-3c042736-0e6c-4911-9b80-b9356af9df25
similarity index 100%
rename from v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/parts/part-0-4fe48beb-19ef-445d-82f1-325a3c7c0b90
rename to v03_pipeline/var/test/reference_data/test_combined_mito_1.ht/rows/parts/part-0-3c042736-0e6c-4911-9b80-b9356af9df25