Skip to content

Commit c6d6240

Browse files
authored
Merge pull request #16 from IARCbioinfo/dev
Dev
2 parents 393cab7 + 2cd5fd0 commit c6d6240

18 files changed

+98
-732
lines changed

.circleci/config.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@ jobs:
88
- run: cd ~ ; wget -qO- get.nextflow.io | bash ; chmod 755 nextflow ; sudo ln -s ~/nextflow /usr/local/bin/ ; sudo apt-get install graphviz
99
- run: cd ~ && git clone https://github.com/iarcbioinfo/data_test.git
1010
- run: echo " docker.runOptions = '-u $(id -u):$(id -g)' " > ~/.nextflow/config
11-
- run: cd ~/project/ ; docker build -t iarcbioinfo/rnaseq-nf .
11+
- run:
12+
name: docker_build
13+
no_output_timeout: 120m
14+
command: cd ~/project/ ; docker build -t iarcbioinfo/rnaseq-nf .
1215
- run: cd ; nextflow run ~/project/RNAseq.nf --help
1316
- run: cd ; nextflow run ~/project/RNAseq.nf -with-docker iarcbioinfo/rnaseq-nf --input_folder ~/data_test/BAM/ --output_folder BAM_realigned --ref_folder ~/data_test/REF --gtf ~/data_test/REF/TP53_small.gtf --bed ~/data_test/BED/TP53_small.bed --cpu 2 --mem 4 -with-dag dag_STAR.png
17+
- run: cd ; nextflow run ~/project/RNAseq.nf -with-docker iarcbioinfo/rnaseq-nf --input_folder data_test/FASTQ/ --fastq_ext fastq.gz --suffix2 null --output_folder BAM_aligned --ref_folder data_test/REF --gtf data_test/REF/TP53_small.gtf --bed data_test/BED/TP53_small.bed --cpu 2 --mem 4 --sjtrim --ref data_test/REF/17_7572000-7591000.fasta --recalibration --snp_vcf data_test/REF/dbsnp_138.17_7572000-7591000.vcf.gz --indel_vcf data_test/REF/1000G_phase1.indels.17_7572000-7591000.sites.vcf.gz
1418
- run: cd ; nextflow run ~/project/RNAseq.nf -with-docker iarcbioinfo/rnaseq-nf --input_folder ~/data_test/BAM/ --output_folder BAM_realigned --ref_folder ~/data_test/REF --gtf ~/data_test/REF/TP53_small.gtf --bed ~/data_test/BED/TP53_small.bed --cpu 2 --mem 4 -with-dag dag_STAR.html
1519
- run: cd ; nextflow run ~/project/RNAseq.nf -with-docker iarcbioinfo/rnaseq-nf --input_folder ~/data_test/BAM/ --output_folder BAM_realigned_sjtrim --ref_folder ~/data_test/REF --gtf ~/data_test/REF/TP53_small.gtf --bed ~/data_test/BED/TP53_small.bed --cpu 2 --mem 4 --sjtrim --ref ~/data_test/REF/17_7572000-7591000.fasta -with-dag dag_STAR_sjtrim.png
1620
- run: cd ; nextflow run ~/project/RNAseq.nf -with-docker iarcbioinfo/rnaseq-nf --input_folder ~/data_test/BAM/ --output_folder BAM_realigned_sjtrim --ref_folder ~/data_test/REF --gtf ~/data_test/REF/TP53_small.gtf --bed ~/data_test/BED/TP53_small.bed --cpu 2 --mem 4 --sjtrim --ref ~/data_test/REF/17_7572000-7591000.fasta -with-dag dag_STAR_sjtrim.html

Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ FROM continuumio/miniconda3:4.7.12
66
LABEL base_image="continuumio/miniconda3"
77
LABEL version="4.7.12"
88
LABEL software="rnaseq-nf"
9-
LABEL software.version="2.3"
9+
LABEL software.version="2.4"
1010
LABEL about.summary="Container image containing all requirements for rnaseq-nf"
1111
LABEL about.home="http://github.com/IARCbioinfo/RNAseq-nf"
1212
LABEL about.documentation="http://github.com/IARCbioinfo/RNAseq-nf/README.md"
@@ -19,5 +19,6 @@ MAINTAINER **nalcala** <**alcalan@fellows.iarc.fr**>
1919
################## INSTALLATION ######################
2020
COPY environment.yml /
2121
RUN apt-get update && apt-get install -y procps && apt-get clean -y
22+
RUN conda config --set channel_priority strict
2223
RUN conda env create -n rnaseq-nf -f /environment.yml && conda clean -a
2324
ENV PATH /opt/conda/envs/rnaseq-nf/bin:$PATH

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,24 @@ nextflow run iarcbioinfo/RNAseq-nf -r v2.2 -profile singularity --input_folder f
125125
```
126126
To run the pipeline using conda instead of singularity, replace "-profile singularity" by "-profile conda". To run with your own local software installation, just remove "-profile singularity".
127127

128+
### Single-end fastq mode
129+
Default is adapted to paired-end libraries. To use single-end libraries as input, you must specify the option "--suffix2 null".
130+
```bash
131+
nextflow run iarcbioinfo/RNAseq-nf -r v2.2 -profile singularity --input_folder fastq --ref_folder ref_genome --gtf ref.gtf --bed ref.bed --suffix2 null
132+
```
133+
If using "--input_file", you must additionally set the values in column "pair2" to "NO_fastq2". For example the following file input.txt:
134+
135+
```
136+
SM RG pair1 pair2
137+
sample1 sample1.fq.gz NO_fastq2
138+
sample2 RG1 sample2_RG1.fq.gz NO_fastq2
139+
sample2 RG2 sample2_RG2.fq.gz NO_fastq2
140+
```
141+
can be processed with
142+
```bash
143+
nextflow run iarcbioinfo/RNAseq-nf -r v2.2 -profile singularity --input_file input.txt --ref_folder ref_genome --gtf ref.gtf --bed ref.bed --suffix2 null
144+
```
145+
128146
### Use hisat2 for mapping
129147
To use hisat2 instead of STAR for the reads mapping, you must add the ***--hisat2* option**, specify the path to the folder containing the hisat2 index files (genome_tran.1.ht2 to genome_tran.8.ht2), as well as satisfy the requirements above mentionned. For example:
130148
```bash

RNAseq.nf

Lines changed: 55 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ params.help = null
4848

4949
log.info ""
5050
log.info "--------------------------------------------------------"
51-
log.info " RNAseq-nf 2.3.0: alignment, QC, and reads counting workflow for RNA sequencing "
51+
log.info " RNAseq-nf 2.4.0: alignment, QC, and reads counting workflow for RNA sequencing "
5252
log.info "--------------------------------------------------------"
5353
log.info "Copyright (C) IARC/WHO"
5454
log.info "This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE"
@@ -132,6 +132,9 @@ if (params.help) {
132132
log.info "help = ${params.help}"
133133
}
134134

135+
suffix2 = params.suffix2
136+
if(suffix2=="null" ) suffix2 = null
137+
135138
//multiqc config file
136139
ch_config_for_multiqc = file(params.multiqc_config)
137140

@@ -216,31 +219,19 @@ if(mode=='bam'){
216219
if(mode=='fastq'){
217220
println "fastq mode"
218221

219-
keys1 = file(params.input_folder).listFiles().findAll { it.name ==~ /.*${params.suffix1}.${params.fastq_ext}/ }.collect { it.getName() }
220-
.collect { it.replace("${params.suffix1}.${params.fastq_ext}",'') }
221-
keys2 = file(params.input_folder).listFiles().findAll { it.name ==~ /.*${params.suffix2}.${params.fastq_ext}/ }.collect { it.getName() }
222-
.collect { it.replace("${params.suffix2}.${params.fastq_ext}",'') }
223-
if ( !(keys1.containsAll(keys2)) || !(keys2.containsAll(keys1)) ) {println "\n ERROR : There is not at least one fastq without its mate, please check your fastq files."; System.exit(0)}
224-
225-
readPairs0 = Channel.fromFilePairs(params.input_folder +"/*{${params.suffix1},${params.suffix2}}" +'.'+ params.fastq_ext)
222+
if(suffix2){
223+
println "paired library"
224+
readPairs0 = Channel.fromFilePairs(params.input_folder +"/*{${params.suffix1},${params.suffix2}}" +'.'+ params.fastq_ext)
226225
.map { row -> [ row[0] , "" , row[1][0], row[1][1] ] }
227-
.subscribe{ row -> println "${row}" }
228-
229-
// Gather files ending with _1 suffix
230-
reads1 = Channel
231-
.fromPath( params.input_folder+'/*'+params.suffix1+'.'+params.fastq_ext )
232-
.map { path -> [ path.name.replace("${params.suffix1}.${params.fastq_ext}",""), path ] }
233-
234-
// Gather files ending with _2 suffix
235-
reads2 = Channel
236-
.fromPath( params.input_folder+'/*'+params.suffix2+'.'+params.fastq_ext )
237-
.map { path -> [ path.name.replace("${params.suffix2}.${params.fastq_ext}",""), path ] }
238-
239-
// Match the pairs on two channels having the same 'key' (name) and emit a new pair containing the expected files
240-
reads1
241-
.phase(reads2)
242-
.map { pair1, pair2 -> [ pair1[0] , "" , pair1[1], pair2[1] ] }
243-
.into{ readPairs ; readPairs2}
226+
.view()
227+
.into{ readPairs ; readPairs2}
228+
}else{
229+
println "single library"
230+
readPairs0 = Channel.fromPath(params.input_folder +"/*${params.suffix1}" +'.'+ params.fastq_ext)
231+
.map { row -> [ row.name.replace("${params.suffix1}.${params.fastq_ext}","") , "" , row , file("NO_fastq2") ] }
232+
.view()
233+
.into{ readPairs ; readPairs2}
234+
}
244235
}
245236
}
246237

@@ -262,10 +253,17 @@ process fastqc_pretrim {
262253
shell:
263254
basename1=pair1.name.replace(".${params.fastq_ext}","") //baseName.split("\\.")[0]
264255
basename2=pair2.name.replace(".${params.fastq_ext}","") //baseName.split("\\.")[0]
256+
if(suffix2){
257+
pairs="${pair1} ${pair2}"
258+
}else{
259+
pairs="${pair1}"
260+
}
265261
'''
266-
fastqc -t !{task.cpus} !{pair1} !{pair2}
262+
fastqc -t !{task.cpus} !{pairs}
267263
mv !{basename1}_fastqc.zip !{file_tag}!{params.suffix1}!{rg}_pretrim_fastqc.zip
268-
mv !{basename2}_fastqc.zip !{file_tag}!{params.suffix2}!{rg}_pretrim_fastqc.zip
264+
if [ ! -L NO_fastq2 ]
265+
then mv !{basename2}_fastqc.zip !{file_tag}!{params.suffix2}!{rg}_pretrim_fastqc.zip
266+
fi
269267
'''
270268
}
271269

@@ -281,7 +279,7 @@ if(params.cutadapt!=null){
281279

282280
output:
283281
set val(file_tag), val(rg) , file("${file_tag}${rg}*val_1.fq.gz"), file("${file_tag}${rg}*val_2.fq.gz") into readPairs3
284-
file("*_val_*_fastqc.zip") into fastqc_postpairs
282+
file("*_fastqc.zip") into fastqc_postpairs
285283
file("*trimming_report.txt") into trimming_reports
286284

287285
publishDir "${params.output_folder}/QC/adapter_trimming", mode: 'copy', pattern: '{*report.txt,*fastqc.zip}'
@@ -290,8 +288,19 @@ if(params.cutadapt!=null){
290288
cpu_tg = params.cpu_trim -1
291289
cpu_tg2 = cpu_tg.div(3.5)
292290
cpu_tg3 = Math.round(Math.ceil(cpu_tg2))
291+
if(suffix2){
292+
pairs="${pair1} ${pair2}"
293+
opts="--paired "
294+
}else{
295+
pairs="${pair1}"
296+
opts=" "
297+
}
293298
'''
294-
trim_galore --paired --fastqc --gzip --basename !{file_tag}!{rg} -j !{cpu_tg3} !{pair1} !{pair2}
299+
trim_galore !{opts} --fastqc --gzip --basename !{file_tag}!{rg} -j !{cpu_tg3} !{pairs}
300+
if [ ! -L NO_fastq2 ]
301+
mv !{file_tag}!{rg}_trimmed.fq.gz !{file_tag}!{rg}_val_1.fq.gz
302+
then touch !{file_tag}!{rg}_val_2.fq.gz
303+
fi
295304
'''
296305
}
297306
}else{
@@ -337,23 +346,32 @@ process alignment {
337346
sort_threads = params.cpu.intdiv(2) - 1
338347
sort_mem = params.mem.intdiv(4)
339348
input_f1="${pair1[0]}"
340-
input_f2="${pair2[0]}"
341349
rgtmp="${rg[0]}"
342350
if(rgtmp=="") rgtmp="${file_tag}"
343351
rgline="ID:${rgtmp} SM:${file_tag} ${params.RG}"
344352
for( p1tmp in pair1.drop(1) ){
345-
input_f1=input_f1+",${p1tmp}"
346-
}
347-
for( p2tmp in pair2.drop(1) ){
348-
input_f2=input_f2+",${p2tmp}"
353+
input_f1=input_f1+",${p1tmp}"
349354
}
350355
for( rgtmp in rg.drop(1) ){
351-
if(rgtmp=="") rgtmp="${file_tag}"
356+
if(rgtmp=="") rgtmp="${file_tag}"
352357
rgline=rgline+" , ID:${rgtmp} SM:${file_tag} ${params.RG}"
353358
}
354-
MQ=""
359+
if(suffix2){
360+
input_f2="${pair2[0]}"
361+
for( p2tmp in pair2.drop(1) ){
362+
input_f2=input_f2+",${p2tmp}"
363+
}
364+
pairs="${input_f1} ${input_f2}"
365+
}else{
366+
pairs="${input_f1}"
367+
}
355368
'''
356-
STAR --outSAMattrRGline !{rgline} --outSAMmapqUnique !{params.STAR_mapqUnique} --chimSegmentMin 12 --chimJunctionOverhangMin 12 --chimSegmentReadGapMax 3 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 --alignSJstitchMismatchNmax 5 -1 5 5 --outSAMstrandField intronMotif --chimMultimapScoreRange 10 --chimMultimapNmax 10 --chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --chimOutJunctionFormat 1 --twopassMode Basic --outReadsUnmapped None --runThreadN !{align_threads} --genomeDir . --sjdbGTFfile !{gtf} --readFilesCommand zcat --readFilesIn !{input_f1} !{input_f2} --outStd SAM | samblaster --addMateTags | sambamba view -S -f bam -l 0 /dev/stdin | sambamba sort -t !{sort_threads} -m !{sort_mem}G --tmpdir=!{file_tag}_tmp -o !{file_tag}.bam /dev/stdin
369+
STAR --outSAMattrRGline !{rgline} --outSAMmapqUnique !{params.STAR_mapqUnique} --chimSegmentMin 12 --chimJunctionOverhangMin 12 \
370+
--chimSegmentReadGapMax 3 --alignSJDBoverhangMin 10 --alignMatesGapMax 100000 --alignIntronMax 100000 \
371+
--alignSJstitchMismatchNmax 5 -1 5 5 --outSAMstrandField intronMotif --chimMultimapScoreRange 10 --chimMultimapNmax 10 \
372+
--chimNonchimScoreDropMin 10 --peOverlapNbasesMin 12 --peOverlapMMp 0.1 --chimOutJunctionFormat 1 --twopassMode Basic \
373+
--outReadsUnmapped None --runThreadN !{align_threads} --genomeDir . --sjdbGTFfile !{gtf} --readFilesCommand zcat \
374+
--readFilesIn !{pairs} --outStd SAM | samblaster --addMateTags | sambamba view -S -f bam -l 0 /dev/stdin | sambamba sort -t !{sort_threads} -m !{sort_mem}G --tmpdir=!{file_tag}_tmp -o !{file_tag}.bam /dev/stdin
357375
mv Chimeric.out.junction STAR.!{file_tag}.Chimeric.SJ.out.junction
358376
mv SJ.out.tab STAR.!{file_tag}.SJ.out.tab
359377
mv Log.final.out STAR.!{file_tag}.Log.final.out

Singularity/Singularity.v2.4

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
From:iarcbioinfo/rnaseq-nf:v2.4
2+
Bootstrap:docker
3+
4+
%labels
5+
MAINTAINER **alcalan** <**alcalan@fellows.iarc.fr**>
6+
DESCRIPTION Container image containing all requirements for pipeline RNAseq-nf
7+
VERSION 2.4
8+
9+

dag_STAR.html

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,15 +199,15 @@
199199
{ data: { source: 'p26', target: 'p46', label: 'align_out' } },
200200
{ data: { source: 'p26', target: 'p28', label: 'SJ_out' } },
201201
{ data: { source: 'p26', target: 'p27', label: 'SJ_out_others' } },
202-
{ data: { source: 'p29', target: 'p40', label: 'recal_bam_files4quant' } },
203-
{ data: { source: 'p29', target: 'p32', label: 'recal_bam4QCsplittmp' } },
204202
{ data: { source: 'p29', target: 'p31', label: 'recal_bam_files4QC' } },
203+
{ data: { source: 'p29', target: 'p32', label: 'recal_bam4QCsplittmp' } },
204+
{ data: { source: 'p29', target: 'p40', label: 'recal_bam_files4quant' } },
205205
{ data: { source: 'p30', target: 'p31', label: 'bed' } },
206206
{ data: { source: 'p31', target: 'p49', label: 'rseqc_files' } },
207207
{ data: { source: 'p31', target: 'p48', label: 'rseqc_clip_files' } },
208208
{ data: { source: 'p31', target: 'p50', label: 'rseqc_jsat_files' } },
209-
{ data: { source: 'p32', target: 'p34', label: 'recal_bam_files4QCsplit0' } },
210209
{ data: { source: 'p32', target: 'p33', label: 'simple' } },
210+
{ data: { source: 'p32', target: 'p34', label: 'recal_bam_files4QCsplit0' } },
211211
{ data: { source: 'p34', target: 'p36', label: 'recal_bam_files4QCsplit' } },
212212
{ data: { source: 'p34', target: 'p37', label: 'recal_bam_files4QCsplit4test' } },
213213
{ data: { source: 'p35', target: 'p36', label: 'bed' } },

0 commit comments

Comments
 (0)