Skip to content

Commit eddfa81

Browse files
authored
Do not strip ECN fields from truth records in SVConcordance (#9161)
1 parent 3f6faa1 commit eddfa81

File tree

2 files changed

+32
-0
lines changed

2 files changed

+32
-0
lines changed

src/main/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordance.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@
6262
* of the specific fields. For multi-allelic CNVs, only a copy state concordance metric is
6363
* annotated. Allele frequencies will be recalculated automatically if unavailable in the provided VCFs.
6464
*
65+
* Minimum matching criteria can be specified as in {@link SVCluster} (e.g. reciprocal overlap). These serve to
66+
* improve computational efficiency, but may be set to relaxed values that can be iterated on post hoc. Note that
67+
* this set of parameters includes minimum sample overlap (Jaccard index of carrier samples), which should generally
68+
* be set to 0 for concordance analysis.
69+
*
6570
* This tool also allows supports stratification of the SVs into groups with specified matching criteria including SV type,
6671
* size range, and interval overlap. Please see the {@link GroupedSVCluster} tool documentation for further details
6772
* on how to specify stratification groups. Stratification only affects the criteria applied to each "eval" SV. In
@@ -307,6 +312,9 @@ private static Genotype stripTruthGenotype(final Genotype genotype) {
307312
if (genotype.hasExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT)) {
308313
builder.attribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT, genotype.getExtendedAttribute(GATKSVVCFConstants.COPY_NUMBER_FORMAT));
309314
}
315+
if (genotype.hasExtendedAttribute(GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT)) {
316+
builder.attribute(GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT, genotype.getExtendedAttribute(GATKSVVCFConstants.EXPECTED_COPY_NUMBER_FORMAT));
317+
}
310318
return builder.make();
311319
}
312320

src/test/java/org/broadinstitute/hellbender/tools/walkers/sv/SVConcordanceIntegrationTest.java

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,30 @@ public void testRefPanel() {
154154
Assert.assertTrue(checkedVariantsSet.contains("ref_panel_1kg.chr22.final_cleanup_INS_chr22_100"));
155155
}
156156

157+
// Regression test with non-zero sample overlap criteria, which require the ECN FORMAT field to be present after
158+
// stripping truth record genotypes of extra fields.
159+
@Test
160+
public void testRefPanelWithSampleOverlap() {
161+
final File output = createTempFile("concord", ".vcf.gz");
162+
final String evalVcfPath = getToolTestDataDir() + "ref_panel_1kg.cleaned.gatk.chr22_chrY.vcf.gz";
163+
final String truthVcfPath = getToolTestDataDir() + "ref_panel_1kg.raw_calls.chr22_chrY.vcf.gz";
164+
final ArgumentsBuilder args = new ArgumentsBuilder()
165+
.addOutput(output)
166+
.add(StandardArgumentDefinitions.SEQUENCE_DICTIONARY_NAME, GATKBaseTest.FULL_HG38_DICT)
167+
.add(SVClusterEngineArgumentsCollection.DEPTH_SAMPLE_OVERLAP_FRACTION_NAME, 0.1)
168+
.add(SVClusterEngineArgumentsCollection.DEPTH_INTERVAL_OVERLAP_FRACTION_NAME, 0.5)
169+
.add(SVClusterEngineArgumentsCollection.DEPTH_BREAKEND_WINDOW_NAME, 10000000)
170+
.add(SVClusterEngineArgumentsCollection.MIXED_SAMPLE_OVERLAP_FRACTION_NAME, 0.1)
171+
.add(SVClusterEngineArgumentsCollection.MIXED_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
172+
.add(SVClusterEngineArgumentsCollection.MIXED_BREAKEND_WINDOW_NAME, 2000)
173+
.add(SVClusterEngineArgumentsCollection.PESR_SAMPLE_OVERLAP_FRACTION_NAME, 0.1)
174+
.add(SVClusterEngineArgumentsCollection.PESR_INTERVAL_OVERLAP_FRACTION_NAME, 0.1)
175+
.add(SVClusterEngineArgumentsCollection.PESR_BREAKEND_WINDOW_NAME, 500)
176+
.add(AbstractConcordanceWalker.TRUTH_VARIANTS_LONG_NAME, truthVcfPath)
177+
.add(AbstractConcordanceWalker.EVAL_VARIANTS_SHORT_NAME, evalVcfPath);
178+
runCommandLine(args, SVConcordance.class.getSimpleName());
179+
}
180+
157181
@Test
158182
public void testRefPanelStratified() {
159183
final File output = createTempFile("concord", ".vcf.gz");

0 commit comments

Comments
 (0)