Skip to content

Commit e3afb9c

Browse files
author
Ubuntu
committed
Merge branch 'release/2.0.0'
2 parents 48cd69a + 411c786 commit e3afb9c

25 files changed

+2032
-2568
lines changed

.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
target/*

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,6 @@ hs_err_pid*
2121
.classpath
2222
.project
2323
.settings/*
24+
.pydevproject
25+
26+
.idea/*

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# in-progress
2+
3+
* Snapshot of [SeqWare-CGP-SomaticCore](https://github.com/ICGC-TCGA-PanCancer/SeqWare-CGP-SomaticCore) at v.1.0.8.
4+
* Separated out to allow removal of upload/download options specific to pancancer processing.
5+
16
# 1.0.8
27

38
* SeqWare-CGP-SomaticCore artefact updated to 1.3.1 to handle readgroup count >115 when combining tumour and normal in SNV caller.
File renamed without changes.

Dockerfile

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
FROM pancancer/seqware_whitestar_pancancer:1.1.2
2+
3+
ENV SANGER_VERSION 1.0.8
4+
LABEL SANGER_VERSION $SANGER_VERSION
5+
6+
USER root
7+
8+
### START of CGP INSTALL ###
9+
10+
ENV OPT /opt/wtsi-cgp
11+
ENV PATH $OPT/bin:$PATH
12+
ENV PERL5LIB $OPT/lib/perl5:$PERL5LIB
13+
14+
RUN apt-get -yqq update && \
15+
apt-get -yqq install libreadline6-dev build-essential autoconf software-properties-common python-software-properties \
16+
wget time curl zlib1g-dev libncurses5-dev \
17+
libgd2-xpm-dev libexpat1-dev python unzip libboost-dev libboost-iostreams-dev \
18+
libpstreams-dev libglib2.0-dev gfortran libcairo2-dev cpanminus libwww-perl \
19+
openjdk-7-jdk && \
20+
apt-get clean
21+
22+
RUN mkdir -p /tmp/downloads $OPT/bin $OPT/etc $OPT/lib $OPT/share
23+
WORKDIR /tmp/downloads
24+
25+
RUN cpanm --mirror http://cpan.metacpan.org -l $OPT File::ShareDir File::ShareDir::Install Bio::Root::Version Const::Fast Graph && \
26+
rm -rf ~/.cpanm
27+
28+
RUN export SOURCE_JKENT_BIN=https://github.com/ENCODE-DCC/kentUtils/raw/master/bin/linux.x86_64 && \
29+
curl -sSL -o $OPT/bin/wigToBigWig -C - --retry 10 ${SOURCE_JKENT_BIN}/wigToBigWig && chmod +x $OPT/bin/wigToBigWig && \
30+
curl -sSL -o $OPT/bin/bigWigMerge -C - --retry 10 ${SOURCE_JKENT_BIN}/bigWigMerge && chmod +x $OPT/bin/bigWigMerge
31+
32+
#BWA
33+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/lh3/bwa/archive/0.7.12.tar.gz && \
34+
tar --strip-components 1 -zxf tmp.tar.gz && \
35+
make && \
36+
cp bwa $OPT/bin/. && \
37+
rm -rf *
38+
39+
#BIOBAMBAM
40+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/gt1/biobambam2/releases/download/2.0.25-release-20151105154334/biobambam2-2.0.25-release-20151105154334-x86_64-etch-linux-gnu.tar.gz && \
41+
tar --strip-components 1 -zxf tmp.tar.gz && \
42+
rm -f bin/curl && \
43+
cp -r bin/* $OPT/bin/. && \
44+
cp -r etc/* $OPT/etc/. && \
45+
cp -r lib/* $OPT/lib/. && \
46+
cp -r share/* $OPT/share/. && \
47+
rm -rf *
48+
49+
# htslib - used multiple times later
50+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/htslib/archive/1.2.1.tar.gz && \
51+
mkdir /tmp/downloads/htslib && \
52+
tar -C /tmp/downloads/htslib --strip-components 1 -zxf tmp.tar.gz && \
53+
make -C /tmp/downloads/htslib && \
54+
rm -f /tmp/downloads/tmp.tar.gz
55+
56+
ENV HTSLIB /tmp/downloads/htslib
57+
58+
# legacy samtools
59+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/samtools/samtools/archive/0.1.20.tar.gz && \
60+
mkdir /tmp/downloads/samtools && \
61+
tar -C /tmp/downloads/samtools --strip-components 1 -zxf tmp.tar.gz && \
62+
perl -i -pe 's/^CFLAGS=\s*/CFLAGS=-fPIC / unless /\b-fPIC\b/' samtools/Makefile && \
63+
make -C samtools && \
64+
cp samtools/samtools $OPT/bin/. && \
65+
export SAMTOOLS=/tmp/downloads/samtools && \
66+
cpanm --mirror http://cpan.metacpan.org -l $OPT Bio::DB::Sam && \
67+
rm -rf /tmp/downloads/samtools /tmp/downloads/tmp.tar.gz ~/.cpanm
68+
69+
# bam_stats + PCAP build
70+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/ICGC-TCGA-PanCancer/PCAP-core/archive/v1.13.1.tar.gz && \
71+
mkdir /tmp/downloads/PCAP && \
72+
tar -C /tmp/downloads/PCAP --strip-components 1 -zxf tmp.tar.gz && \
73+
make -C /tmp/downloads/PCAP/c && \
74+
cp /tmp/downloads/PCAP/bin/bam_stats $OPT/bin/. && \
75+
make -C /tmp/downloads/PCAP/c clean && \
76+
cd /tmp/downloads/PCAP && \
77+
cpanm --mirror http://cpan.metacpan.org -l $OPT . && \
78+
cd /tmp/downloads && \
79+
rm -rf /tmp/downloads/PCAP /tmp/downloads/tmp.tar.gz ~/.cpanm
80+
81+
RUN curl -sSL -o tmp.zip --retry 10 https://github.com/samtools/tabix/archive/master.zip && \
82+
unzip -q tmp.zip && \
83+
cd /tmp/downloads/tabix-master && \
84+
make && \
85+
cp tabix $OPT/bin/. && \
86+
cp bgzip $OPT/bin/. && \
87+
cd perl && \
88+
perl Makefile.PL INSTALL_BASE=$INST_PATH && \
89+
make && make test && make install && \
90+
cd /tmp/downloads && \
91+
rm -rf /tmp/downloads/tabix-master /tmp/downloads/tmp.zip
92+
93+
# start of cgpVcf block
94+
# the commit UUID for the release of cgpVcf in use
95+
96+
ENV CGPVCF_UUID 5cc538ded838a4ba94feedff1b51ee3ebc4b65f4
97+
98+
# build vcftools using patch from cgpVcf release
99+
RUN curl -sSL -o tmp.tar.gz --retry 10 http://sourceforge.net/projects/vcftools/files/vcftools_0.1.12a.tar.gz/download && \
100+
mkdir /tmp/downloads/vcftools && \
101+
tar -C /tmp/downloads/vcftools --strip-components 1 -zxf /tmp/downloads/tmp.tar.gz && \
102+
cd /tmp/downloads/vcftools && \
103+
curl -sSL -o vcfToolsInstLocs.diff --retry 10 https://raw.githubusercontent.com/cancerit/cgpVcf/$CGPVCF_UUID/patches/vcfToolsInstLocs.diff && \
104+
patch Makefile < vcfToolsInstLocs.diff && \
105+
curl -sSL -o vcfToolsProcessLog.diff --retry 10 https://raw.githubusercontent.com/cancerit/cgpVcf/$CGPVCF_UUID/patches/vcfToolsProcessLog.diff && \
106+
patch perl/Vcf.pm < vcfToolsProcessLog.diff && \
107+
make PREFIX=$OPT && \
108+
cd /tmp/downloads && \
109+
rm -rf /tmp/downloads/vcftools /tmp/downloads/tmp.tar.gz
110+
111+
# bedtools, make sure it is a suitable version without the input switch
112+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/arq5x/bedtools2/releases/download/v2.21.0/bedtools-2.21.0.tar.gz && \
113+
mkdir /tmp/downloads/bedtools2 && \
114+
tar -C /tmp/downloads/bedtools2 --strip-components 1 -zxf tmp.tar.gz && \
115+
make -C /tmp/downloads/bedtools2 && \
116+
cp /tmp/downloads/bedtools2/bin/* $OPT/bin/. && \
117+
rm -rf /tmp/downloads/bedtools2 /tmp/downloads/tmp.tar.gz
118+
119+
# cgpVcf
120+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/cgpVcf/archive/v1.3.1.tar.gz && \
121+
mkdir /tmp/downloads/cgpVcf && \
122+
tar -C /tmp/downloads/cgpVcf --strip-components 1 -zxf tmp.tar.gz && \
123+
cd /tmp/downloads/cgpVcf && \
124+
cpanm --mirror http://cpan.metacpan.org -l $OPT . && \
125+
cd /tmp/downloads && \
126+
rm -rf /tmp/downloads/cgpVcf /tmp/downloads/tmp.tar.gz ~/.cpanm
127+
128+
# alleleCount - only want C version
129+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/alleleCount/archive/v2.1.2.tar.gz && \
130+
mkdir /tmp/downloads/alleleCount && \
131+
tar -C /tmp/downloads/alleleCount --strip-components 1 -zxf tmp.tar.gz && \
132+
cd /tmp/downloads/alleleCount/c && \
133+
mkdir bin && \
134+
make && \
135+
cp /tmp/downloads/alleleCount/c/bin/alleleCounter $OPT/bin/. && \
136+
cd /tmp/downloads && \
137+
rm -rf /tmp/downloads/alleleCount /tmp/downloads/tmp.tar.gz
138+
139+
# verifyBamId
140+
RUN curl -sSL -o $OPT/bin/verifyBamId --retry 10 https://github.com/statgen/verifyBamID/releases/download/v1.1.2/verifyBamID.1.1.2 && \
141+
chmod +x $OPT/bin/verifyBamId && \
142+
rm -f /tmp/downloads/verifyBamId
143+
144+
# cgpNgsQc
145+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/cgpNgsQc/archive/v1.1.0.tar.gz && \
146+
mkdir /tmp/downloads/cgpNgsQc && \
147+
tar -C /tmp/downloads/cgpNgsQc --strip-components 1 -zxf tmp.tar.gz && \
148+
cd /tmp/downloads/cgpNgsQc && \
149+
cpanm --mirror http://cpan.metacpan.org -l $OPT . && \
150+
cd /tmp/downloads && \
151+
rm -rf /tmp/downloads/cgpNgsQc /tmp/downloads/tmp.tar.gz ~/.cpanm
152+
153+
# ascatNgs
154+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/ascatNgs/archive/v1.7.1.tar.gz && \
155+
mkdir /tmp/downloads/ascatNgs && \
156+
tar -C /tmp/downloads/ascatNgs --strip-components 1 -zxf tmp.tar.gz && \
157+
cd /tmp/downloads/ascatNgs/perl && \
158+
cpanm --mirror http://cpan.metacpan.org -l $OPT . && \
159+
cd /tmp/downloads && \
160+
rm -rf /tmp/downloads/ascatNgs /tmp/downloads/tmp.tar.gz ~/.cpanm
161+
162+
# cgpPindel
163+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/cgpPindel/archive/v1.5.5.tar.gz && \
164+
mkdir /tmp/downloads/cgpPindel && \
165+
tar -C /tmp/downloads/cgpPindel --strip-components 1 -zxf tmp.tar.gz && \
166+
cd /tmp/downloads/cgpPindel && \
167+
g++ -O3 -o $OPT/bin/pindel c++/pindel.cpp && \
168+
g++ -O3 -o $OPT/bin/filter_pindel_reads c++/filter_pindel_reads.cpp && \
169+
cd /tmp/downloads/cgpPindel/perl && \
170+
cpanm --mirror http://cpan.metacpan.org -l $OPT . && \
171+
cd /tmp/downloads && \
172+
rm -rf /tmp/downloads/cgpPindel /tmp/downloads/tmp.tar.gz ~/.cpanm
173+
174+
# cgpCaVEManPostProcessing
175+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/cgpCaVEManPostProcessing/archive/1.5.3.tar.gz && \
176+
mkdir /tmp/downloads/cgpCaVEManPostProcessing && \
177+
tar -C /tmp/downloads/cgpCaVEManPostProcessing --strip-components 1 -zxf tmp.tar.gz && \
178+
cd /tmp/downloads/cgpCaVEManPostProcessing && \
179+
cpanm --mirror http://cpan.metacpan.org -l $OPT . && \
180+
cd /tmp/downloads && \
181+
rm -rf /tmp/downloads/cgpCaVEManPostProcessing /tmp/downloads/tmp.tar.gz ~/.cpanm
182+
183+
# CaVEMan
184+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/CaVEMan/archive/1.9.1.tar.gz && \
185+
mkdir /tmp/downloads/CaVEMan && \
186+
tar -C /tmp/downloads/CaVEMan --strip-components 1 -zxf tmp.tar.gz && \
187+
cd /tmp/downloads/CaVEMan && \
188+
make && \
189+
cp /tmp/downloads/CaVEMan/bin/caveman $OPT/bin/. && \
190+
cp /tmp/downloads/CaVEMan/bin/mergeCavemanResults $OPT/bin/. && \
191+
cd /tmp/downloads && \
192+
rm -rf /tmp/downloads/CaVEMan /tmp/downloads/tmp.tar.gz ~/.cpanm ~/.cache/hts-ref
193+
194+
195+
# cgpCaVEManWrapper
196+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/cgpCaVEManWrapper/archive/1.9.2.tar.gz && \
197+
mkdir /tmp/downloads/cgpCaVEManWrapper && \
198+
tar -C /tmp/downloads/cgpCaVEManWrapper --strip-components 1 -zxf tmp.tar.gz && \
199+
cd /tmp/downloads/cgpCaVEManWrapper && \
200+
cpanm --mirror http://cpan.metacpan.org -l $OPT . &&\
201+
cd /tmp/downloads && \
202+
rm -rf /tmp/downloads/cgpCaVEManWrapper /tmp/downloads/tmp.tar.gz ~/.cpanm
203+
204+
# VAGrENT
205+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/VAGrENT/archive/v2.1.3.tar.gz && \
206+
mkdir /tmp/downloads/VAGrENT && \
207+
tar -C /tmp/downloads/VAGrENT --strip-components 1 -zxf tmp.tar.gz && \
208+
cd /tmp/downloads/VAGrENT && \
209+
cpanm --mirror http://cpan.metacpan.org -l $OPT . &&\
210+
cd /tmp/downloads && \
211+
rm -rf /tmp/downloads/VAGrENT /tmp/downloads/tmp.tar.gz /tmp/downloads/*.tmp.bioperl ~/.cpanm
212+
213+
# grass
214+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/grass/archive/v1.1.6.tar.gz && \
215+
mkdir /tmp/downloads/grass && \
216+
tar -C /tmp/downloads/grass --strip-components 1 -zxf tmp.tar.gz && \
217+
cd /tmp/downloads/grass && \
218+
cpanm --mirror http://cpan.metacpan.org -l $OPT . &&\
219+
cd /tmp/downloads && \
220+
rm -rf /tmp/downloads/grass /tmp/downloads/tmp.tar.gz ~/.cpanm
221+
222+
223+
# BRASS
224+
# blat first
225+
RUN curl -sSL -o $OPT/bin/blat --retry 10 http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/blat && \
226+
chmod ugo+x $OPT/bin/blat
227+
228+
# pre-compiled exonerate
229+
RUN curl -sSL http://ftp.ebi.ac.uk/pub/software/vertebrategenomics/exonerate/exonerate-2.2.0-x86_64.tar.gz | \
230+
tar -C $OPT/bin --strip-components=2 -zx exonerate-2.2.0-x86_64/bin/exonerate && \
231+
chmod ugo+x $OPT/bin/exonerate
232+
233+
# perl mod Graph installed at top of file due to being required in Bio/Brass.pm
234+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/cancerit/BRASS/archive/v4.0.15.tar.gz && \
235+
mkdir /tmp/downloads/BRASS && \
236+
tar -C /tmp/downloads/BRASS --strip-components 1 -zxf tmp.tar.gz && \
237+
cd /tmp/downloads/BRASS && \
238+
rm -rf cansam* && \
239+
unzip -q distros/cansam.zip && \
240+
mv cansam-master cansam && \
241+
make -C cansam && \
242+
make -C c++ && \
243+
cp c++/augment-bam $OPT/bin/. && \
244+
cp c++/brass-group $OPT/bin/. && \
245+
cp c++/filterout-bam $OPT/bin/. && \
246+
tar zxf distros/velvet_1.2.10.tgz && \
247+
cd velvet_1.2.10 && \
248+
make MAXKMERLENGTH=95 velveth velvetg && \
249+
mv velveth $OPT/bin/velvet95h && \
250+
mv velvetg $OPT/bin/velvet95g && \
251+
make clean && \
252+
make velveth velvetg && \
253+
mv velveth $OPT/bin/velvet31h && \
254+
mv velvetg $OPT/bin/velvet31g && \
255+
ln -fs $OPT/bin/velvet95h $OPT/bin/velveth && \
256+
ln -fs $OPT/bin/velvet95g $OPT/bin/velvetg && \
257+
cd /tmp/downloads/BRASS && \
258+
cd /tmp/downloads/BRASS/perl && \
259+
cpanm --mirror http://cpan.metacpan.org -l $OPT . && \
260+
cd /tmp/downloads && \
261+
rm -rf /tmp/downloads/BRASS /tmp/downloads/tmp.tar.gz ~/.cpanm
262+
263+
# build the R bits
264+
RUN curl -sSL -o tmp.tar.gz --retry 10 http://ftp.heanet.ie/mirrors/cran.r-project.org/src/base/R-3/R-3.1.3.tar.gz && \
265+
mkdir /tmp/downloads/R-build && \
266+
tar -C /tmp/downloads/R-build --strip-components 1 -zxf tmp.tar.gz && \
267+
cd /tmp/downloads/R-build && \
268+
./configure --with-cairo=yes --prefix=$OPT && \
269+
make && \
270+
make check && \
271+
make install && \
272+
cd /tmp/downloads && \
273+
rm -rf /tmp/downloads/R-build /tmp/downloads/tmp.tar.gz
274+
275+
RUN echo '(".Rprofile: Setting UK repository")\n\
276+
r = getOption("repos") # hard code the UK repo for CRAN\n\
277+
r["CRAN"] = "http://cran.uk.r-project.org"\n\
278+
options(repos = r)\n\
279+
rm(r)\n\
280+
source("http://bioconductor.org/biocLite.R")\n\
281+
biocLite("gam", ask=FALSE)\n\
282+
biocLite("VGAM", ask=FALSE)\n\
283+
biocLite("stringr", ask=FALSE)\n\
284+
biocLite("BiocGenerics", ask=FALSE)\n\
285+
biocLite("poweRlaw", ask=FALSE)\n\
286+
biocLite("S4Vectors", ask=FALSE)\n\
287+
biocLite("IRanges", ask=FALSE)\n\
288+
biocLite("GenomeInfoDb", ask=FALSE)\n\
289+
biocLite("zlibbioc", ask=FALSE)\n\
290+
biocLite("XVector", ask=FALSE)\n\
291+
biocLite("RColorBrewer", ask=FALSE)\n\
292+
biocLite("GenomicRanges", ask=FALSE)\n\
293+
biocLite("copynumber", ask=FALSE)' > tmp.R && \
294+
Rscript tmp.R && \
295+
rm tmp.R
296+
297+
# Add ssearch36 BRASS dep
298+
RUN curl -sSL -o tmp.tar.gz --retry 10 https://github.com/wrpearson/fasta36/releases/download/v36.3.8/fasta-36.3.8-linux64.tar.gz && \
299+
mkdir /tmp/downloads/fasta && \
300+
tar -C /tmp/downloads/fasta --strip-components 2 -zxf tmp.tar.gz && \
301+
cp /tmp/downloads/fasta/bin/ssearch36 $OPT/bin/. && \
302+
rm -rf /tmp/downloads/fasta
303+
304+
### END of CGP INSTALL ###
305+
306+
COPY ./src /home/seqware/CGP-Somatic-Docker/src
307+
COPY ./workflow /home/seqware/CGP-Somatic-Docker/workflow
308+
COPY ./scripts /home/seqware/CGP-Somatic-Docker/scripts
309+
COPY ./pom.xml /home/seqware/CGP-Somatic-Docker/pom.xml
310+
COPY ./workflow.properties /home/seqware/CGP-Somatic-Docker/workflow.properties
311+
312+
RUN chmod a+x /home/seqware/CGP-Somatic-Docker/scripts/run_sanger.sh
313+
RUN chmod a+x /home/seqware/CGP-Somatic-Docker/scripts/run_seqware_workflow.py
314+
315+
316+
ENV SEQWARE_ROOT="root"
317+
WORKDIR /home/seqware/CGP-Somatic-Docker
318+
319+
RUN echo "options(bitmapType='cairo')" > /home/seqware/.Rprofile && \
320+
sed -i 's|OOZIE_RETRY_MAX=.*|OOZIE_RETRY_MAX=0|' /home/seqware/.seqware/settings && \
321+
echo 'WHITESTAR_MEMORY_LIMIT=160000' >> /home/seqware/.seqware/settings
322+
323+
# default entry will run test data
324+
#ENTRYPOINT /home/seqware/CGP-Somatic-Docker/scripts/run_sanger.sh
325+
326+
# build the workflow which will prevent problems in the future if artifactory at OICR goes down
327+
RUN mvn -B clean install
328+
329+
VOLUME /output
330+
VOLUME /datastore
331+
VOLUME /home/seqware
332+
333+
CMD /bin/bash

0 commit comments

Comments
 (0)