Skip to content

Commit d620e86

Browse files
authored
Testing by running with plumbing data (#5)
Add infrastructure for testing by running workflows with plumbing data, and some tests for BenchmarkVCFs
1 parent 0ad9c4a commit d620e86

18 files changed

+327
-46
lines changed

.circleci/config.yml

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,66 @@
1-
# CircleCI 2.0 configuration file
1+
# CircleCI 2.1 configuration file
22
#
33
#
4-
version: 2
5-
jobs:
6-
build:
4+
version: 2.1
5+
executors:
6+
womtool-executor:
77
docker:
88
# specify the version you desire here
99
- image: broadinstitute/womtool:47-b36d920
1010
entrypoint: /bin/bash
1111
working_directory: ~/palantir-workflows
12+
environment: WOMTOOL_JAR=/app/womtool.jar
1213

13-
environment:
14-
# Customize the JVM maximum heap limit
14+
commands:
15+
get-cromwell-jar:
16+
description: "Download cromwell jar"
17+
steps:
18+
- run: wget https://github.com/broadinstitute/cromwell/releases/download/47/cromwell-47.jar
19+
20+
install-make:
21+
description: "Install make"
22+
steps:
23+
- run: |
24+
apt-get update
25+
apt-get install -y make
26+
jobs:
27+
validate:
28+
executor: womtool-executor
29+
steps:
30+
- checkout
31+
- install-make
32+
- run: make validate
1533

34+
validate-with-json:
35+
executor: womtool-executor
1636
steps:
1737
- checkout
18-
- run: apt-get update
19-
- run: apt-get install -y make
20-
- run: make all
38+
- install-make
39+
- run: make validate-with-json
40+
41+
test:
42+
machine: true
43+
environment:
44+
CROMWELL_JAR=/home/circleci/project/cromwell-47.jar
45+
steps:
46+
- checkout
47+
- get-cromwell-jar
48+
- run: make test
49+
- store_artifacts:
50+
path: /home/circleci/project/logs
51+
- store_artifacts:
52+
path: /home/circleci/project/workflow_logs
53+
- store_artifacts:
54+
path: /home/circleci/project/call_logs
55+
56+
57+
workflows:
58+
version: 2
59+
validate-and-test:
60+
jobs:
61+
- validate
62+
- validate-with-json
63+
- test:
64+
requires:
65+
- validate
66+
- validate-with-json

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,16 @@ old_*
3737
*.recal
3838
*.recal.idx
3939

40+
#cromwell executions folders
41+
*cromwell-executions*
42+
4043
#hidden and backup files
4144
._*
4245
.vimbup
4346
.*.done
4447
.queue
4548
*~
46-
.*.swp
49+
.*.swp
50+
51+
#Intellij stuff
52+
.idea

BenchmarkVCFs/BenchmarkVCFs.wdl

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ workflow Benchmark {
9494
input:
9595
evalVcf=evalVcf,
9696
truthVcf=truthVcf,
97+
evalVcfIndex=evalVcfIndex,
98+
truthVcfIndex=truthVcfIndex,
9799
hapMap=hapMap,
98100
gatkTag=gatkTag,
99101
preemptible=preemptible
@@ -1385,8 +1387,10 @@ task CombineSummaries {
13851387
#Use CrosscheckFingerprints to match evaluation vcfs to appropriate truth vcfs
13861388
task MatchEvalTruth {
13871389
input{
1388-
String evalVcf
1389-
String truthVcf
1390+
File evalVcf
1391+
File truthVcf
1392+
File evalVcfIndex
1393+
File truthVcfIndex
13901394
File hapMap
13911395
Int? preemptible
13921396
Int? memoryMaybe
@@ -1397,6 +1401,21 @@ task MatchEvalTruth {
13971401
Int memoryRam=memoryJava+2
13981402
Int disk_size = 10 + ceil(size(hapMap, "GB"))
13991403
1404+
parameter_meta {
1405+
evalVcf: {
1406+
localization_optional: true
1407+
}
1408+
truthVcf: {
1409+
localization_optional: true
1410+
}
1411+
evalVcfIndex: {
1412+
localization_optional: true
1413+
}
1414+
truthVcfIndex: {
1415+
localization_optional: true
1416+
}
1417+
}
1418+
14001419
command <<<
14011420
gatk --java-options "-Xmx~{memoryJava}G" CrosscheckFingerprints -I ~{evalVcf} -SI ~{truthVcf} -H ~{hapMap} --CROSSCHECK_MODE CHECK_ALL_OTHERS --CROSSCHECK_BY FILE --EXPECT_ALL_GROUPS_TO_MATCH
14021421
>>>

BenchmarkVCFs/BenchmarkVCFs.wdl.json

Lines changed: 0 additions & 20 deletions
This file was deleted.

Makefile

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,38 @@
1+
TEST_JSON= $(shell find test -name '*.json')
12

2-
all: validate validate_with_json
3+
VALIDATE_WDL= $(shell find . -name '*.wdl' ! -path './test/*')
34

4-
WDL= $(shell find . -name '*.wdl')
5+
TEST=java -jar $(CROMWELL_JAR) run
56

6-
VALIDATE=java -jar /app/womtool.jar validate
7+
VALIDATE=java -jar $(WOMTOOL_JAR) validate
78

8-
all-tests := $(addsuffix .test, $(basename $(WDL)))
9-
all-tests-json := $(addsuffix .json, $(all-tests))
9+
all-tests := $(addsuffix .test, $(TEST_JSON))
1010

11-
validate: $(all-tests)
12-
validate_with_json : $(all-tests) $(all-tests-json)
11+
all-validations := $(addsuffix .validate, $(VALIDATE_WDL))
1312

14-
test : $(all-tests)
13+
all-validations-with-json := $(addsuffix .validate, $(TEST_JSON))
1514

16-
test_with_json: test $(all-tests-json)
15+
.PHONY: all
16+
all: test validate
1717

18-
%.test : %.wdl
19-
$(VALIDATE) $?
18+
.PHONY: test
19+
test: $(all-tests)
2020

21-
%.test.json : %.wdl.json
22-
$(VALIDATE) $(basename $? .json) -i $?
21+
.PHONY: validate
22+
validate: $(all-validations)
23+
24+
.PHONY: validate-with-json
25+
validate-with-json: $(all-validations-with-json)
26+
27+
.PHONY: %.wdl.validate
28+
%.wdl.validate:
29+
$(VALIDATE) $(basename $@)
30+
31+
.PHONY: %.json.test
32+
%.json.test:
33+
mkdir -p logs
34+
$(TEST) $(subst _json/,.wdl, $(dir $(basename $@))) -i $(basename $@) -o test_options.json 2>&1 | tee logs/$(notdir $(subst _json/,.wdl, $(dir $(basename $@))))_with_$(notdir $(basename $@)).log
35+
36+
.PHONY: %.json.validate
37+
%.json.validate:
38+
$(VALIDATE) $(subst _json/,.wdl, $(dir $(basename $@))) -i $(basename $@)

README.md

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,21 @@ Utility workflows used by the DSP's Palantir team. This repository should be us
77
**Remember, this is a public repository, so anything you put in this repo is publicly viewable.**
88

99

10-
**To enable testable workflows, please make sure that a FILE.wdl.json is present with every FILE.wdl you add.
10+
## Testing Workflows
11+
12+
All workflows should have associated tests.
13+
In order to add tests, you should add a test workflow to the `test` directory.
14+
The test workflow should call the workflow you are testing, and (preferably) compare the outputs to those expected.
15+
Input JSONs for the test workflow must be placed in a directory whose name is the same as the test workflow, with `.wdl` replaced by `_json`.
16+
So, the test directory structure will be built like this:
17+
18+
```bash
19+
+-- palantir-workflows
20+
| +-- test
21+
| | +-- MyWorkflow
22+
| | | +-- my_test_workflow.wdl
23+
| | | +-- my_test_workflow_json
24+
| | | | +-- test_input_1.json
25+
| | | | +-- test_input_2.json
26+
+++++++++++++++++
27+
```
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
@HD VN:1.6 SO:coordinate
2+
@SQ SN:chr1 LN:100
3+
chr1 1 17 + .
4+
chr1 20 65 + .
5+
chr1 70 97 + .
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
##fileformat=VCFv4.2
2+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
3+
##contig=<ID=chr1,length=100>
4+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SM-1
5+
chr1 10 . G T 30 . . GT 0/1
6+
chr1 22 . CCACG C 30 . . GT 1/1
7+
chr1 73 . A AGT 30 . . GT 1/1
8+
chr1 86 . G T 30 SUPER_FILTER . GT 0/1
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
>chr1 LN:100
2+
GGTGGAGCGCGCCGCCACGGACCACGGGCGGGCTGGCGGGCGAGCGGCGAGCGCGCGGCG
3+
ATCCGAGCCCCTAGGGCGGATCCCGGCTCCAGGCCCGCGC
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
@HD VN:1.0 SO:unsorted
2+
@SQ SN:chr1 LN:100 M5:cc081e3e70932dda461569ee09e668ba
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
chr1 100 13 60 61
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
@HD VN:1.6 SO:coordinate
2+
@SQ SN:chr1 LN:100
3+
#CHROMOSOME POSITION NAME MAJOR_ALLELE MINOR_ALLELE MAF ANCHOR_SNP PANELS
4+
chr1 10 rs1 G T 0.5
5+
chr1 22 rs2 CCACG C 0.9
6+
chr1 73 rs3 A AGT 0.5
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
##fileformat=VCFv4.2
2+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
3+
##contig=<ID=chr1,length=100>
4+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SM-1
5+
chr1 10 . G T 30 . . GT 0/1
6+
chr1 22 . CCACG C 30 . . GT 1/1
7+
chr1 73 . A AGT 30 . . GT 0/1
8+
chr1 86 . G T 30 . . GT 0/1
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
version 1.0
2+
3+
import "../../BenchmarkVCFs/BenchmarkVCFs.wdl"
4+
5+
workflow testBenchmarkVCFs {
6+
input {
7+
Float expected_snpPrecision
8+
Float expected_indelPrecision
9+
Float expected_snpRecall
10+
Float expected_indelRecall
11+
12+
File evalVcf
13+
File truthVcf
14+
}
15+
16+
call BgzipAndIndex as BgzipAndIndexEval {
17+
input:
18+
vcf = evalVcf
19+
}
20+
21+
call BgzipAndIndex as BgzipAndIndexTruth {
22+
input:
23+
vcf = truthVcf
24+
}
25+
26+
call BenchmarkVCFs.Benchmark {
27+
input:
28+
evalVcf = BgzipAndIndexEval.bgzipped_vcf,
29+
evalVcfIndex = BgzipAndIndexEval.vcf_index,
30+
truthVcf = BgzipAndIndexTruth.bgzipped_vcf,
31+
truthVcfIndex = BgzipAndIndexTruth.vcf_index
32+
}
33+
34+
call AssertPassed {
35+
input:
36+
expected_snpPrecision = expected_snpPrecision,
37+
expected_indelPrecision = expected_indelPrecision,
38+
expected_snpRecall = expected_snpRecall,
39+
expected_indelRecall = expected_indelRecall,
40+
observed_snpPrecision = Benchmark.snpPrecision,
41+
observed_indelPrecision = Benchmark.indelPrecision,
42+
observed_snpRecall = Benchmark.snpRecall,
43+
observed_indelRecall = Benchmark.indelRecall,
44+
45+
}
46+
}
47+
48+
task BgzipAndIndex {
49+
input {
50+
File vcf
51+
}
52+
53+
command <<<
54+
set -xeuo pipefail
55+
56+
ln -s ~{vcf} .
57+
bgzip ~{basename(vcf)}
58+
tabix ~{basename(vcf) + ".gz"}
59+
>>>
60+
61+
runtime {
62+
docker: "biocontainers/tabix@sha256:7e093436d00c01cf6ad7b285680bf1657f9fcb692cc083c972e5df5a7e951f49"
63+
}
64+
65+
output {
66+
File bgzipped_vcf = "~{basename(vcf) + '.gz'}"
67+
File vcf_index = "~{basename(vcf) + '.gz.tbi'}"
68+
}
69+
}
70+
71+
task AssertPassed {
72+
input {
73+
Float expected_snpPrecision
74+
Float expected_indelPrecision
75+
Float expected_snpRecall
76+
Float expected_indelRecall
77+
78+
Float observed_snpPrecision
79+
Float observed_indelPrecision
80+
Float observed_snpRecall
81+
Float observed_indelRecall
82+
}
83+
84+
command <<<
85+
set -euo pipefail
86+
87+
assert_eq() {
88+
local variable="$1"
89+
local expected="$2"
90+
local observed="$3"
91+
92+
if [[ $expected != $observed ]]; then
93+
>&2 echo $variable expected to be $expected, observed as $observed
94+
exit 1;
95+
fi
96+
}
97+
98+
assert_eq snpPrecision ~{expected_snpPrecision} ~{observed_snpPrecision}
99+
assert_eq indelPrecision ~{expected_indelPrecision} ~{observed_indelPrecision}
100+
assert_eq snpRecall ~{expected_snpRecall} ~{observed_snpRecall}
101+
assert_eq indelRecall ~{expected_indelRecall} ~{observed_indelRecall}
102+
>>>
103+
104+
runtime {
105+
docker: "ubuntu@sha256:134c7fe821b9d359490cd009ce7ca322453f4f2d018623f849e580a89a685e5d"
106+
}
107+
}

0 commit comments

Comments
 (0)