Skip to content

Commit efab764

Browse files
committed
Add samtools for file format testing.
1 parent ee25c7b commit efab764

File tree

5 files changed

+209
-0
lines changed

5 files changed

+209
-0
lines changed

.github/workflows/gatk-tests.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ jobs:
118118
if: needs.check-secrets.outputs.google-credentials == 'true'
119119
uses: google-github-actions/setup-gcloud@v2
120120

121+
- name: 'Install Samtools'
122+
run: scripts/install-samtools.sh
123+
121124
- name: pull lfs files
122125
run: git lfs pull
123126

scripts/install-samtools.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/sh
2+
set -ex
3+
#ubuntu specific
4+
sudo apt-get update
5+
sudo apt-get upgrade
6+
sudo apt-get install -y libncurses-dev libbz2-dev liblzma-dev
7+
8+
#install from the github tar
9+
export SAMTOOLS_VERSION=1.21
10+
wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VERSION}/samtools-${SAMTOOLS_VERSION}.tar.bz2
11+
tar -xjvf samtools-${SAMTOOLS_VERSION}.tar.bz2
12+
cd samtools-${SAMTOOLS_VERSION} && ./configure --prefix=/usr && make && sudo make install

src/test/java/org/broadinstitute/hellbender/PrintFileDiagnosticsIntegrationTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ public class PrintFileDiagnosticsIntegrationTest extends CommandLineProgramTest
1515
@DataProvider(name = "fileDiagnosticsTestCases")
1616
public Object[][] getFileDiagnosticsTestCases() {
1717
return new Object[][]{
18+
{
19+
"src/test/resources/large/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.v3.0.samtools.cram",
20+
List.of(Pair.of("count-limit", "10")),
21+
"src/test/resources/filediagnostics/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.txt"
22+
},
1823
{
1924
//this pathname is embedded in the diagnostics output file, so we use a relative pathname
2025
// instead of the named constant NA12878_20_21_WGS_cram in order to avoid test failures
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package org.broadinstitute.hellbender.testutils;
2+
3+
import htsjdk.io.HtsPath;
4+
import htsjdk.io.IOPath;
5+
import htsjdk.samtools.SAMRecord;
6+
import htsjdk.samtools.SamReader;
7+
import htsjdk.samtools.SamReaderFactory;
8+
import htsjdk.samtools.ValidationStringency;
9+
import htsjdk.samtools.util.CloseableIterator;
10+
import htsjdk.samtools.util.ProcessExecutor;
11+
import org.broadinstitute.hellbender.GATKBaseTest;
12+
import org.testng.Assert;
13+
import org.testng.SkipException;
14+
import org.testng.annotations.Test;
15+
16+
import java.io.File;
17+
import java.io.IOException;
18+
19+
public class SamtoolsTestUtilsTest extends GATKBaseTest {
20+
private static final File TEST_DATA_DIR = new File("src/test/resources/org/broadinstitute/hellbender/tools/");
21+
22+
@Test
23+
public void testSamtoolsIsAvailable() {
24+
Assert.assertTrue(SamtoolsTestUtils.isSamtoolsAvailable());
25+
}
26+
27+
@Test
28+
public void testSamtoolsVersion() {
29+
if (!SamtoolsTestUtils.isSamtoolsAvailable()) {
30+
throw new SkipException("Samtools not available on local device");
31+
}
32+
// If this test runs, but fails because version validation fails, then the local samtools version is
33+
// not the one expected by the htsjdk tests
34+
final ProcessExecutor.ExitStatusAndOutput processStatus = SamtoolsTestUtils.executeSamToolsCommand("--version");
35+
Assert.assertTrue(processStatus.stdout.contains(SamtoolsTestUtils.expectedSamtoolsVersion));
36+
}
37+
38+
@Test(expectedExceptions = RuntimeException.class)
39+
public void testSamtoolsPresentButCommandFails() {
40+
if (!SamtoolsTestUtils.isSamtoolsAvailable()) {
41+
throw new SkipException("Samtools not available on local device");
42+
}
43+
SamtoolsTestUtils.executeSamToolsCommand("--notASamtoolsCommand");
44+
}
45+
46+
@Test
47+
public void testCRAMConversion()throws IOException {
48+
if (!SamtoolsTestUtils.isSamtoolsAvailable()) {
49+
throw new SkipException("Samtools not available on local device");
50+
}
51+
52+
// Validates CRAM 3.1 conversion.
53+
final File sourceFile = new File(TEST_DATA_DIR, "print_reads.cram");
54+
final File cramReference = new File(TEST_DATA_DIR, "print_reads.fasta");
55+
// This also validates that any extra command line arguments are passed through to samtools by requesting
56+
// that NM/MD values are synthesized in the output file (which is required for the output records to match).
57+
final IOPath tempSamtoolsPath = SamtoolsTestUtils.convertToCRAM(
58+
new HtsPath(sourceFile.getAbsolutePath()),
59+
new HtsPath(cramReference.getAbsolutePath()),
60+
"--output-fmt cram,version=3.0,fast");
61+
final SamReaderFactory factory = SamReaderFactory.makeDefault()
62+
.validationStringency(ValidationStringency.LENIENT)
63+
.referenceSequence(cramReference);
64+
try (final SamReader originalReader = factory.open(sourceFile);
65+
final SamReader samtoolsCopyReader = factory.open(tempSamtoolsPath.toPath());
66+
final CloseableIterator<SAMRecord> originalIt = originalReader.iterator();
67+
final CloseableIterator<SAMRecord> samtoolsIt = samtoolsCopyReader.iterator()) {
68+
while (originalIt.hasNext() && samtoolsIt.hasNext()) {
69+
Assert.assertEquals(originalIt.next(), samtoolsIt.next());
70+
}
71+
Assert.assertEquals(samtoolsIt.hasNext(), originalIt.hasNext());
72+
}
73+
}
74+
}
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package org.broadinstitute.hellbender.testutils;
2+
3+
import htsjdk.beta.plugin.IOUtils;
4+
import htsjdk.io.IOPath;
5+
import htsjdk.samtools.util.FileExtensions;
6+
import htsjdk.samtools.util.ProcessExecutor;
7+
import java.nio.file.Files;
8+
import java.nio.file.Path;
9+
import java.nio.file.Paths;
10+
11+
/**
12+
* Test utilities for running samtools from htsjdk tests.
13+
*/
14+
public class SamtoolsTestUtils {
15+
private static final String SAMTOOLS_BINARY_ENV_VARIABLE = "HTSJDK_SAMTOOLS_BIN";
16+
public final static String expectedSamtoolsVersion = "1.21";
17+
18+
/**
19+
* @return true if samtools is available, otherwise false
20+
*/
21+
public static boolean isSamtoolsAvailable() {
22+
final String binPath = getSamtoolsBin();
23+
final Path binFile = Paths.get(binPath);
24+
return Files.exists(binFile);
25+
}
26+
27+
/**
28+
* @return true if a local samtools executable is available, otherwise throws a runtimeException
29+
*/
30+
public static void assertSamtoolsAvailable() {
31+
if (!isSamtoolsAvailable()) {
32+
throw new RuntimeException(
33+
String.format(
34+
"No samtools executable can be found." +
35+
" The %s environment variable must be set to the name of the local samtools executable.",
36+
SAMTOOLS_BINARY_ENV_VARIABLE));
37+
}
38+
}
39+
40+
/**
41+
* @return the name and location of the local samtools executable as specified by the environment
42+
* variable HTSJDK_SAMTOOLS_BIN, or the default value of "/usr/local/bin/samtools" if the environment
43+
* variable is not set
44+
*/
45+
public static String getSamtoolsBin() {
46+
final String samtoolsPath = System.getenv(SAMTOOLS_BINARY_ENV_VARIABLE);
47+
return samtoolsPath == null ? "/usr/local/bin/samtools" : samtoolsPath;
48+
}
49+
50+
/**
51+
* Execute a samtools command line if a local samtools executable is available see {@link #isSamtoolsAvailable()}.
52+
*
53+
* @param commandLine samtools command line string, excluding the "samtools" prefix. For example:
54+
* {@code "view -h -b my.sam -o my.bam"}
55+
* @return the {@link ProcessExecutor.ExitStatusAndOutput} resulting from the command execution, if
56+
* the command succeeds
57+
* @throws RuntimeException if the command fails, or if a local samtools executable is not available.
58+
*/
59+
public static ProcessExecutor.ExitStatusAndOutput executeSamToolsCommand(final String commandLine) {
60+
assertSamtoolsAvailable();
61+
final String commandString = String.format("%s %s", getSamtoolsBin(), commandLine);
62+
final ProcessExecutor.ExitStatusAndOutput processStatus =
63+
ProcessExecutor.executeAndReturnInterleavedOutput(commandString);
64+
if (processStatus.exitStatus != 0) {
65+
// samtools seems to write some errors to stdout
66+
throw new RuntimeException(
67+
String.format("Failure code %d returned from samtools command %s\n (stderr: %.500s)\n (stdout: %.500s)\n",
68+
processStatus.exitStatus,
69+
commandString,
70+
processStatus.stderr == null ? "" : processStatus.stderr,
71+
processStatus.stdout == null ? "" : processStatus.stdout));
72+
}
73+
return processStatus;
74+
}
75+
76+
/**
77+
* Convert an input sam/bam/cram file to a temporary CRAM file using the samtools "view" command. The temp
78+
* file will be deleted when the process exits. Use {@link #isSamtoolsAvailable()} to determine if its safe
79+
* to use this method.
80+
*
81+
* @param inputSAMBAMCRAMFile input file to convert
82+
* @param referenceFile a valid reference file
83+
* @param commandLineOptions additional command line options (--input-fmt-option or --output-fmt-option)
84+
* @return a temporary file containing the samtools-generated results.
85+
*/
86+
public static final IOPath convertToCRAM(
87+
final IOPath inputSAMBAMCRAMFile,
88+
final IOPath referenceFile,
89+
final String commandLineOptions) {
90+
assertSamtoolsAvailable();
91+
final IOPath tempCRAMPath = IOUtils.createTempPath("samtoolsTemporaryCRAM", FileExtensions.CRAM);
92+
tempCRAMPath.toPath().toFile().deleteOnExit();
93+
final String commandString = String.format("view -h -C -T %s %s %s -o %s",
94+
referenceFile.toPath().toAbsolutePath(),
95+
commandLineOptions == null ? "" : commandLineOptions,
96+
inputSAMBAMCRAMFile.toPath().toAbsolutePath(),
97+
tempCRAMPath.toPath().toAbsolutePath());
98+
executeSamToolsCommand(commandString);
99+
return tempCRAMPath;
100+
}
101+
102+
public static final void convertToCRAM(
103+
final IOPath inputSAMBAMCRAMPath,
104+
final IOPath outputPath,
105+
final IOPath referencePath,
106+
final String commandLineOptions) {
107+
assertSamtoolsAvailable();
108+
final String commandString = String.format("view -h -C -T %s %s %s -o %s",
109+
referencePath.toPath().toAbsolutePath(),
110+
commandLineOptions == null ? "" : commandLineOptions,
111+
inputSAMBAMCRAMPath.toPath().toAbsolutePath(),
112+
outputPath.toPath().toAbsolutePath());
113+
executeSamToolsCommand(commandString);
114+
}
115+
}

0 commit comments

Comments
 (0)