broadinstitute · mcovarr · May 8, 2025 · Apr 14, 2025 · Apr 14, 2025 · Apr 15, 2025
diff --git a/scripts/variantstore/scripts/import_gvs.py b/scripts/variantstore/scripts/import_gvs.py
@@ -378,11 +378,11 @@ def convert_array_with_id_keys_to_dense_array(arr, ids, drop=[]):
             any_yes=acc.any_yes | allele_YES[called_idx - 1],
             any_snp=acc.any_snp | allele_is_snp[called_idx - 1],
             any_indel=acc.any_indel | ~allele_is_snp[called_idx - 1],
-            any_snp_ok=acc.any_snp_ok | (allele_is_snp[called_idx - 1] & allele_OK[called_idx - 1]),
-            any_indel_ok=acc.any_indel_ok | (~allele_is_snp[called_idx - 1] & allele_OK[called_idx - 1]),
-        ), hl.struct(any_no=False, any_yes=False, any_snp=False, any_indel=False, any_snp_ok=False, any_indel_ok=False)))
+            all_snps_ok=acc.all_snps_ok & (~allele_is_snp[called_idx - 1] | allele_OK[called_idx - 1]),
+            all_indels_ok=acc.all_indels_ok & (allele_is_snp[called_idx - 1] | allele_OK[called_idx - 1]),
-            all_indels_ok=acc.all_indels_ok & (allele_is_snp[called_idx - 1] | allele_OK[called_idx - 1]),
+            all_snps_ok=acc.all_snps_ok & (allele_is_snp[called_idx - 1] & allele_OK[called_idx - 1]),
+            all_indels_ok=acc.all_indels_ok & (~allele_is_snp[called_idx - 1] & allele_OK[called_idx - 1]),
-            all_indels_ok=acc.all_indels_ok & (allele_is_snp[called_idx - 1] | allele_OK[called_idx - 1]),
+            all_snps_ok=acc.all_snps_ok & (allele_is_snp[called_idx - 1] & allele_OK[called_idx - 1]),
+            all_indels_ok=acc.all_indels_ok & (~allele_is_snp[called_idx - 1] & allele_OK[called_idx - 1]),
+        ), hl.struct(any_no=False, any_yes=False, any_snp=False, any_indel=False, all_snps_ok=True, all_indels_ok=True)))
 
-        vd = vd.annotate_entries(FT=~ft.any_no & (ft.any_yes | ((~ft.any_snp | ft.any_snp_ok) & (~ft.any_indel | ft.any_indel_ok))))
+        vd = vd.annotate_entries(FT=~ft.any_no & (ft.any_yes | ((~ft.any_snp | ft.all_snps_ok) & (~ft.any_indel | ft.all_indels_ok))))
 
         vd = vd.drop('allele_NO', 'allele_YES', 'allele_is_snp', 'allele_OK')
         hl.vds.VariantDataset(

diff --git a/scripts/variantstore/scripts/merge_and_rescore_vdses.py b/scripts/variantstore/scripts/merge_and_rescore_vdses.py
@@ -153,18 +153,18 @@ def patch_variant_data(vd: hl.MatrixTable, site_filters: hl.Table, vets_filters:
                 any_yes=acc.any_yes | allele_YES[called_idx - 1],
                 any_snp=acc.any_snp | allele_is_snp[called_idx - 1],
                 any_indel=acc.any_indel | ~allele_is_snp[called_idx - 1],
-                any_snp_ok=acc.any_snp_ok
-                           | (allele_is_snp[called_idx - 1] & allele_OK[called_idx - 1]),
-                any_indel_ok=acc.any_indel_ok
-                             | (~allele_is_snp[called_idx - 1] & allele_OK[called_idx - 1]),
+                all_snps_ok=acc.all_snps_ok
+                            & (~allele_is_snp[called_idx - 1] | allele_OK[called_idx - 1]),
+                all_indels_ok=acc.all_indels_ok
+                              & (allele_is_snp[called_idx - 1] | allele_OK[called_idx - 1]),
             ),
             hl.struct(
                 any_no=False,
                 any_yes=False,
                 any_snp=False,
                 any_indel=False,
-                any_snp_ok=False,
-                any_indel_ok=False,
+                all_snps_ok=True,
+                all_indels_ok=True,
             ),
         )
     )
@@ -173,7 +173,7 @@ def patch_variant_data(vd: hl.MatrixTable, site_filters: hl.Table, vets_filters:
         FT=~ft.any_no
            & (
                    ft.any_yes
-                   | ((~ft.any_snp | ft.any_snp_ok) & (~ft.any_indel | ft.any_indel_ok))
+                   | ((~ft.any_snp | ft.all_snps_ok) & (~ft.any_indel | ft.all_indels_ok))
            )
     )
 

diff --git a/scripts/variantstore/scripts/vds_validation.py b/scripts/variantstore/scripts/vds_validation.py
@@ -17,34 +17,34 @@
 
 
 def check_samples_match(vds):
-	print('checking sample equivalence between reference and variant MTs')
-	assert vds.reference_data.cols().select().collect() == vds.variant_data.cols().select().collect()
+    print('checking sample equivalence between reference and variant MTs')
+    assert vds.reference_data.cols().select().collect() == vds.variant_data.cols().select().collect()
 
 def check_ref_blocks(vds):
-	print('checking that:\n  * no reference blocks have GQ=0\n  * all ref blocks have END after start\n  * all ref blocks are max 1000 bases long')
-	rd = vds.reference_data
-	rd = rd.annotate_rows(locus_start = rd.locus.position)
+    print('checking that:\n  * no reference blocks have GQ=0\n  * all ref blocks have END after start\n  * all ref blocks are max 1000 bases long')
+    rd = vds.reference_data
+    rd = rd.annotate_rows(locus_start = rd.locus.position)
 
-	LEN = rd.END - rd.locus_start + 1
+    LEN = rd.END - rd.locus_start + 1
 
-	print('checking that: no reference blocks have GQ=0')
-	assert rd.aggregate_entries(hl.agg.all(hl.all(rd.GQ > 0)))
+    print('checking that: no reference blocks have GQ=0')
+    assert rd.aggregate_entries(hl.agg.all(hl.all(rd.GQ > 0)))
 
-	print('checking that: all ref blocks have END after start')
-	assert rd.aggregate_entries(hl.agg.all(hl.all(LEN >= 0)))
+    print('checking that: all ref blocks have END after start')
+    assert rd.aggregate_entries(hl.agg.all(hl.all(LEN >= 0)))
 
-	print('checking that: all ref blocks are max 1000 bases long')
-	assert rd.aggregate_entries(hl.agg.all(hl.all(LEN <= rd.ref_block_max_length)))
+    print('checking that: all ref blocks are max 1000 bases long')
+    assert rd.aggregate_entries(hl.agg.all(hl.all(LEN <= rd.ref_block_max_length)))
 
 def check_densify_small_region(vds):
-	print('running densify on 200kb region')
-	from time import time
-	t1 = time()
+    print('running densify on 200kb region')
+    from time import time
+    t1 = time()
 
-	filt = hl.vds.filter_intervals(vds, [hl.parse_locus_interval('chr16:29.5M-29.7M', reference_genome='GRCh38')])
-	n=hl.vds.to_dense_mt(filt).select_entries('LGT')._force_count_rows()
+    filt = hl.vds.filter_intervals(vds, [hl.parse_locus_interval('chr16:29.5M-29.7M', reference_genome='GRCh38')])
+    n=hl.vds.to_dense_mt(filt).select_entries('LGT')._force_count_rows()
 
-	print(f'took {time() - t1:.1f}s to densify {n} rows after interval query')
+    print(f'took {time() - t1:.1f}s to densify {n} rows after interval query')
 
 
 

diff --git a/scripts/variantstore/wdl/GvsUtils.wdl b/scripts/variantstore/wdl/GvsUtils.wdl
@@ -129,7 +129,7 @@ task GetToolVersions {
     # GVS generally uses the smallest `alpine` version of the Google Cloud SDK as it suffices for most tasks, but
     # there are a handlful of tasks that require the larger GNU libc-based `slim`.
     String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:435.0.0-slim"
-    String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2025-04-17-alpine-927ee022e0b8"
+    String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2025-04-18-alpine-364b678b8945"
     String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19"
     String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2025-03-20-gatkbase-728e45646e04"
     String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest"

diff --git a/src/main/java/org/broadinstitute/hellbender/tools/gvs/extract/ExtractCohortEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/gvs/extract/ExtractCohortEngine.java
@@ -1006,14 +1006,14 @@ private VariantContext createVariantContextFromSampleRecord(final ExtractCohortR
     boolean isFailingGenotype(final Stream<Allele> nonRefAlleles,
                               final Map<Allele, Double> remappedVQScoreMap,
                               final Double vqScoreThreshold) {
-        // get the max (best) vqslod of the alleles in this genotype
-        Optional<Double> maxVal =
+        // get the min (worst) vqslod of the alleles in this genotype
+        Optional<Double> minVal =
                 nonRefAlleles
                         .map(remappedVQScoreMap::get)
                         .filter(Objects::nonNull)
-                        .max(Double::compareTo);
-        // It's a failing site if the maximum vqlod (if found) is less than the threshold
-        return maxVal.isPresent() && maxVal.get() < vqScoreThreshold;
+                        .min(Double::compareTo);
+        // It's a failing site if the minimum vqlod (if found) is less than the threshold
+        return minVal.isPresent() && minVal.get() < vqScoreThreshold;
     }
 
     private SortingCollection<GenericRecord> createSortedVetCollectionFromBigQuery(final String projectID,

diff --git a/src/main/java/org/broadinstitute/hellbender/tools/gvs/extract/ExtractCohortVETSEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/gvs/extract/ExtractCohortVETSEngine.java
@@ -129,13 +129,14 @@ boolean isFailingSite(final Stream<Double> vqScores, final Double vqScoreThresho
     boolean isFailingGenotype(final Stream<Allele> nonRefAlleles,
                               final Map<Allele, Double> remappedVQScoreMap,
                               final Double vqScoreThreshold) {
-        // get the minimum (best) calibration sensitivity for all non-Yay sites, and apply the filter
-        Optional<Double> minVal =
+        // Get the maximum (worst) calibration sensitivity for these non-Yay alleles. If there is an allele, and it is
+        // greater than the vqScoreThreshold, fail the genotype.
+        Optional<Double> maxVal =
                 nonRefAlleles
                         .map(remappedVQScoreMap::get)
                         .filter(Objects::nonNull)
-                        .min(Double::compareTo);
+                        .max(Double::compareTo);
 
-        return minVal.isPresent() && minVal.get() > vqScoreThreshold;
+        return maxVal.isPresent() && maxVal.get() > vqScoreThreshold;
     }
 }