diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf index c738a0748..df834fbad 100644 --- a/hello-nextflow/hello-config/main.nf +++ b/hello-nextflow/hello-config/main.nf @@ -8,32 +8,69 @@ params.reads_bam = "${projectDir}/data/sample_bams.txt" // Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" // Base name for final output file params.cohort_name = "family_trio" +workflow { + + // Create input channel from a text file listing input file paths + reads_ch = Channel.fromPath(params.reads_bam).splitText() + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) + + // Collect variant calling outputs across samples + all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() + all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() + + // Combine GVCFs into a GenomicsDB data store and apply joint genotyping + GATK_JOINTGENOTYPING( + all_gvcfs_ch, + all_idxs_ch, + intervals_file, + params.cohort_name, + ref_file, + ref_index_file, + ref_dict_file + ) +} + /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -41,29 +78,27 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf", emit: vcf + path "${input_bam}.g.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.g.vcf \ - -L ${interval_list} \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.g.vcf \\ + -L ${interval_list} \\ -ERC GVCF """ } @@ -72,75 +107,34 @@ process GATK_HAPLOTYPECALLER { * Combine GVCFs into GenomicsDB datastore and run joint genotyping to produce cohort-level calls */ process GATK_JOINTGENOTYPING { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - path all_gvcfs - path all_idxs - path interval_list - val cohort_name - path ref_fasta - path ref_index - path ref_dict + path all_gvcfs + path all_idxs + path interval_list + val cohort_name + path ref_fasta + path ref_index + path ref_dict output: - path "${cohort_name}.joint.vcf" - path "${cohort_name}.joint.vcf.idx" + path "${cohort_name}.joint.vcf" + path "${cohort_name}.joint.vcf.idx" script: - def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') + def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') """ - gatk GenomicsDBImport \ - ${gvcfs_line} \ - -L ${interval_list} \ + gatk GenomicsDBImport \\ + ${gvcfs_line} \\ + -L ${interval_list} \\ --genomicsdb-workspace-path ${cohort_name}_gdb - gatk GenotypeGVCFs \ - -R ${ref_fasta} \ - -V gendb://${cohort_name}_gdb \ - -L ${interval_list} \ + gatk GenotypeGVCFs \\ + -R ${ref_fasta} \\ + -V gendb://${cohort_name}_gdb \\ + -L ${interval_list} \\ -O ${cohort_name}.joint.vcf """ } - -workflow { - - // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) - - // Collect variant calling outputs across samples - all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() - all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() - - // Combine GVCFs into a GenomicsDB data store and apply joint genotyping - GATK_JOINTGENOTYPING( - all_gvcfs_ch, - all_idxs_ch, - intervals_file, - params.cohort_name, - ref_file, - ref_index_file, - ref_dict_file - ) -} diff --git a/hello-nextflow/hello-containers.nf b/hello-nextflow/hello-containers.nf index fa799b78b..98595dfe2 100644 --- a/hello-nextflow/hello-containers.nf +++ b/hello-nextflow/hello-containers.nf @@ -8,24 +8,37 @@ params.input_file = "containers/data/greetings.csv" // 'meow', 'miki', 'milk', 'octopus', 'pig', 'stegosaurus', 'stimpy', 'trex', 'turkey', 'turtle', 'tux' params.character = "cow" +workflow { + + // create a channel for inputs from a CSV file + input_ch = Channel + .fromPath(params.input_file) + .splitCsv() + .flatten() + + sayHello(input_ch) + + // cowSay the text + cowSay(sayHello.out) +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - publishDir 'containers/results', mode: 'copy' input: - val greeting + val greeting output: - path "output-*.txt" + path "output-*.txt" script: - // Replace the spaces in the greeting with hyphens for the output filename - def safe_greeting = greeting.tokenize(' ').join('-') - """ - echo '$greeting' > 'output-${safe_greeting}.txt' + // Replace the spaces in the greeting with hyphens for the output filename + def safe_greeting = greeting.tokenize(' ').join('-') + """ + echo '${greeting}' > 'output-${safe_greeting}.txt' """ } @@ -33,30 +46,16 @@ process sayHello { * Use a cow (or other character) to say some text */ process cowSay { - publishDir 'containers/results', mode: 'copy' input: - path input_file + path input_file output: - path "cowsay-*" + path "cowsay-*" script: """ - cowsay -c "$params.character" -t "\$(cat $input_file)" > cowsay-${input_file} + cowsay -c "${params.character}" -t "\\$(cat ${input_file})" > cowsay-${input_file} """ } - -workflow { - - // create a channel for inputs from a CSV file - input_ch = Channel.fromPath(params.input_file) - .splitCsv() - .flatten() - - sayHello(input_ch) - - // cowSay the text - cowSay(sayHello.out) -} diff --git a/hello-nextflow/hello-modules/main.nf b/hello-nextflow/hello-modules/main.nf index ddbd345a9..2a9ac3358 100644 --- a/hello-nextflow/hello-modules/main.nf +++ b/hello-nextflow/hello-modules/main.nf @@ -1,24 +1,61 @@ #!/usr/bin/env nextflow +workflow { + + // Create input channel from a text file listing input file paths + reads_ch = Channel.fromPath(params.reads_bam).splitText() + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) + + // Collect variant calling outputs across samples + all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() + all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() + + // Combine GVCFs into a GenomicsDB data store and apply joint genotyping + GATK_JOINTGENOTYPING( + all_gvcfs_ch, + all_idxs_ch, + intervals_file, + params.cohort_name, + ref_file, + ref_index_file, + ref_dict_file + ) +} + /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' conda "bioconda::samtools=1.20" - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -26,30 +63,28 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" conda "bioconda::gatk4=4.5.0.0" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf", emit: vcf + path "${input_bam}.g.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.g.vcf \ - -L ${interval_list} \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.g.vcf \\ + -L ${interval_list} \\ -ERC GVCF """ } @@ -58,76 +93,35 @@ process GATK_HAPLOTYPECALLER { * Combine GVCFs into GenomicsDB datastore and run joint genotyping to produce cohort-level calls */ process GATK_JOINTGENOTYPING { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" conda "bioconda::gatk4=4.5.0.0" - publishDir 'results_genomics', mode: 'symlink' input: - path all_gvcfs - path all_idxs - path interval_list - val cohort_name - path ref_fasta - path ref_index - path ref_dict + path all_gvcfs + path all_idxs + path interval_list + val cohort_name + path ref_fasta + path ref_index + path ref_dict output: - path "${cohort_name}.joint.vcf" - path "${cohort_name}.joint.vcf.idx" + path "${cohort_name}.joint.vcf" + path "${cohort_name}.joint.vcf.idx" script: - def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') + def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') """ - gatk GenomicsDBImport \ - ${gvcfs_line} \ - -L ${interval_list} \ + gatk GenomicsDBImport \\ + ${gvcfs_line} \\ + -L ${interval_list} \\ --genomicsdb-workspace-path ${cohort_name}_gdb - gatk GenotypeGVCFs \ - -R ${ref_fasta} \ - -V gendb://${cohort_name}_gdb \ - -L ${interval_list} \ + gatk GenotypeGVCFs \\ + -R ${ref_fasta} \\ + -V gendb://${cohort_name}_gdb \\ + -L ${interval_list} \\ -O ${cohort_name}.joint.vcf """ } - -workflow { - - // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) - - // Collect variant calling outputs across samples - all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() - all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() - - // Combine GVCFs into a GenomicsDB data store and apply joint genotyping - GATK_JOINTGENOTYPING( - all_gvcfs_ch, - all_idxs_ch, - intervals_file, - params.cohort_name, - ref_file, - ref_index_file, - ref_dict_file - ) -} diff --git a/hello-nextflow/hello-nf-test/main.nf b/hello-nextflow/hello-nf-test/main.nf index a4f588684..532ff2289 100644 --- a/hello-nextflow/hello-nf-test/main.nf +++ b/hello-nextflow/hello-nf-test/main.nf @@ -11,10 +11,10 @@ workflow { reads_ch = Channel.fromPath(params.reads_bam).splitText() // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) // Create index file for input BAM file SAMTOOLS_INDEX(reads_ch) diff --git a/hello-nextflow/hello-operators.nf b/hello-nextflow/hello-operators.nf index f1c5338c9..682758326 100644 --- a/hello-nextflow/hello-operators.nf +++ b/hello-nextflow/hello-operators.nf @@ -8,29 +8,51 @@ params.reads_bam = "${projectDir}/data/sample_bams.txt" // Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" + +workflow { + + // Create input channel from a text file listing input file paths + reads_ch = Channel.fromPath(params.reads_bam).splitText() + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) +} /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -38,52 +60,26 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.vcf" , emit: vcf - path "${input_bam}.vcf.idx" , emit: idx + path "${input_bam}.vcf", emit: vcf + path "${input_bam}.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.vcf \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.vcf \\ -L ${interval_list} """ } - -workflow { - - // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) -} diff --git a/hello-nextflow/hello-world.nf b/hello-nextflow/hello-world.nf index 53ee46f85..8645a8a88 100755 --- a/hello-nextflow/hello-world.nf +++ b/hello-nextflow/hello-world.nf @@ -1,21 +1,20 @@ #!/usr/bin/env nextflow +workflow { + + // emit a greeting + sayHello() +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - output: - stdout + stdout script: """ echo 'Hello World!' """ } - -workflow { - - // emit a greeting - sayHello() -} diff --git a/hello-nextflow/solutions/hello-config/4-nextflow.config b/hello-nextflow/solutions/hello-config/4-nextflow.config index 86d38f89f..fe25febe5 100644 --- a/hello-nextflow/solutions/hello-config/4-nextflow.config +++ b/hello-nextflow/solutions/hello-config/4-nextflow.config @@ -27,7 +27,7 @@ process { cpus = 2 memory = 2.GB // allocations for a specific process - withName: 'GATK_JOINTGENOTYPING' { + withName: GATK_JOINTGENOTYPING { cpus = 8 } } diff --git a/hello-nextflow/solutions/hello-config/5-nextflow.config b/hello-nextflow/solutions/hello-config/5-nextflow.config index b901560ef..68ac89e88 100644 --- a/hello-nextflow/solutions/hello-config/5-nextflow.config +++ b/hello-nextflow/solutions/hello-config/5-nextflow.config @@ -22,16 +22,16 @@ profiles { } demo { // Primary input (file of input files, one per line) - params.reads_bam = "data/sample_bams.txt" + params.reads_bam = "data/sample_bams.txt" // Accessory files - params.reference = "data/ref/ref.fasta" - params.reference_index = "data/ref/ref.fasta.fai" - params.reference_dict = "data/ref/ref.dict" - params.intervals = "data/ref/intervals.bed" + params.reference = "data/ref/ref.fasta" + params.reference_index = "data/ref/ref.fasta.fai" + params.reference_dict = "data/ref/ref.dict" + params.intervals = "data/ref/intervals.bed" // Base name for final output file - params.cohort_name = "family_trio" + params.cohort_name = "family_trio" } } @@ -40,7 +40,7 @@ process { cpus = 2 memory = 2.GB // allocations for a specific process - withName: 'GATK_JOINTGENOTYPING' { + withName: GATK_JOINTGENOTYPING { cpus = 8 } } @@ -51,14 +51,14 @@ process { params { // Primary input (file of input files, one per line) - reads_bam = null + reads_bam = null // Accessory files - reference = null - reference_index = null - reference_dict = null - intervals = null + reference = null + reference_index = null + reference_dict = null + intervals = null // Base name for final output file - cohort_name = "my_cohort" + cohort_name = "my_cohort" } diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-1.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-1.nf index 7d9915434..e539a319b 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-1.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-1.nf @@ -7,32 +7,30 @@ // Primary input params.reads_bam = "${projectDir}/data/bam/reads_mother.bam" +workflow { + + // Create input channel (single file via CLI parameter) + reads_ch = Channel.fromPath(params.reads_bam) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) +} + /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - path "${input_bam}.bai" + path "${input_bam}.bai" script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } - -workflow { - - // Create input channel (single file via CLI parameter) - reads_ch = Channel.fromPath(params.reads_bam) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) -} diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-2.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-2.nf index 6e1ef58d9..4a8da4415 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-2.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-2.nf @@ -8,29 +8,52 @@ params.reads_bam = "${projectDir}/data/bam/reads_mother.bam" // Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" + +workflow { + + // Create input channel (single file via CLI parameter) + reads_ch = Channel.fromPath(params.reads_bam) + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + reads_ch, + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) +} /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - path "${input_bam}.bai" + path "${input_bam}.bai" script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -38,54 +61,27 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam - path input_bam_index - path ref_fasta - path ref_index - path ref_dict - path interval_list + path input_bam + path input_bam_index + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.vcf" , emit: vcf - path "${input_bam}.vcf.idx" , emit: idx + path "${input_bam}.vcf", emit: vcf + path "${input_bam}.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.vcf \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.vcf \\ -L ${interval_list} """ } - -workflow { - - // Create input channel (single file via CLI parameter) - reads_ch = Channel.fromPath(params.reads_bam) - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - reads_ch, - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) -} diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-3.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-3.nf index 29fe15785..842a465bb 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-3.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-3.nf @@ -12,29 +12,51 @@ params.reads_bam = [ ] // Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" + +workflow { + + // Create input channel (single file via CLI parameter) + reads_ch = Channel.fromPath(params.reads_bam) + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) +} /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -42,52 +64,26 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.vcf" , emit: vcf - path "${input_bam}.vcf.idx" , emit: idx + path "${input_bam}.vcf", emit: vcf + path "${input_bam}.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.vcf \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.vcf \\ -L ${interval_list} """ } - -workflow { - - // Create input channel (single file via CLI parameter) - reads_ch = Channel.fromPath(params.reads_bam) - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) -} diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf index c0cb837e6..682758326 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf @@ -8,29 +8,51 @@ params.reads_bam = "${projectDir}/data/sample_bams.txt" // Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" + +workflow { + + // Create input channel from a text file listing input file paths + reads_ch = Channel.fromPath(params.reads_bam).splitText() + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) +} /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -38,52 +60,26 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.vcf" , emit: vcf - path "${input_bam}.vcf.idx" , emit: idx + path "${input_bam}.vcf", emit: vcf + path "${input_bam}.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.vcf \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.vcf \\ -L ${interval_list} """ } - -workflow { - - // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) -} diff --git a/hello-nextflow/solutions/hello-modules/final-main.nf b/hello-nextflow/solutions/hello-modules/final-main.nf index a4f588684..532ff2289 100644 --- a/hello-nextflow/solutions/hello-modules/final-main.nf +++ b/hello-nextflow/solutions/hello-modules/final-main.nf @@ -11,10 +11,10 @@ workflow { reads_ch = Channel.fromPath(params.reads_bam).splitText() // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) // Create index file for input BAM file SAMTOOLS_INDEX(reads_ch) diff --git a/hello-nextflow/solutions/hello-modules/modules/local/gatk/haplotypecaller/main.nf b/hello-nextflow/solutions/hello-modules/modules/local/gatk/haplotypecaller/main.nf index 04748f765..68411191a 100644 --- a/hello-nextflow/solutions/hello-modules/modules/local/gatk/haplotypecaller/main.nf +++ b/hello-nextflow/solutions/hello-modules/modules/local/gatk/haplotypecaller/main.nf @@ -4,30 +4,28 @@ * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" conda "bioconda::gatk4=4.5.0.0" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf", emit: vcf + path "${input_bam}.g.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.g.vcf \ - -L ${interval_list} \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.g.vcf \\ + -L ${interval_list} \\ -ERC GVCF """ } diff --git a/hello-nextflow/solutions/hello-modules/modules/local/gatk/jointgenotyping/main.nf b/hello-nextflow/solutions/hello-modules/modules/local/gatk/jointgenotyping/main.nf index a8babd379..9a9982a42 100644 --- a/hello-nextflow/solutions/hello-modules/modules/local/gatk/jointgenotyping/main.nf +++ b/hello-nextflow/solutions/hello-modules/modules/local/gatk/jointgenotyping/main.nf @@ -2,37 +2,35 @@ * Combine GVCFs into GenomicsDB datastore and run joint genotyping to produce cohort-level calls */ process GATK_JOINTGENOTYPING { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" conda "bioconda::gatk4=4.5.0.0" - publishDir 'results_genomics', mode: 'symlink' input: - path all_gvcfs - path all_idxs - path interval_list - val cohort_name - path ref_fasta - path ref_index - path ref_dict + path all_gvcfs + path all_idxs + path interval_list + val cohort_name + path ref_fasta + path ref_index + path ref_dict output: - path "${cohort_name}.joint.vcf" , emit: vcf - path "${cohort_name}.joint.vcf.idx", emit: idx + path "${cohort_name}.joint.vcf", emit: vcf + path "${cohort_name}.joint.vcf.idx", emit: idx script: - def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') + def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') """ - gatk GenomicsDBImport \ - ${gvcfs_line} \ - -L ${interval_list} \ + gatk GenomicsDBImport \\ + ${gvcfs_line} \\ + -L ${interval_list} \\ --genomicsdb-workspace-path ${cohort_name}_gdb - gatk GenotypeGVCFs \ - -R ${ref_fasta} \ - -V gendb://${cohort_name}_gdb \ - -L ${interval_list} \ + gatk GenotypeGVCFs \\ + -R ${ref_fasta} \\ + -V gendb://${cohort_name}_gdb \\ + -L ${interval_list} \\ -O ${cohort_name}.joint.vcf """ } diff --git a/hello-nextflow/solutions/hello-modules/modules/local/samtools/index/main.nf b/hello-nextflow/solutions/hello-modules/modules/local/samtools/index/main.nf index 2f5d92227..2876cfa57 100644 --- a/hello-nextflow/solutions/hello-modules/modules/local/samtools/index/main.nf +++ b/hello-nextflow/solutions/hello-modules/modules/local/samtools/index/main.nf @@ -4,20 +4,18 @@ * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' conda "bioconda::samtools=1.20" - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf index c55772892..cecbe17f9 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf @@ -8,29 +8,51 @@ params.reads_bam = "${projectDir}/data/sample_bams.txt" // Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" + +workflow { + + // Create input channel from a text file listing input file paths + reads_ch = Channel.fromPath(params.reads_bam).splitText() + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) +} /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -38,53 +60,27 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf", emit: vcf + path "${input_bam}.g.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.g.vcf \ - -L ${interval_list} \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.g.vcf \\ + -L ${interval_list} \\ -ERC GVCF """ } - -workflow { - - // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) -} diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf index fa6574aba..cd38b958b 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf @@ -8,32 +8,66 @@ params.reads_bam = "${projectDir}/data/sample_bams.txt" // Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" // Base name for final output file params.cohort_name = "family_trio" +workflow { + + // Create input channel from a text file listing input file paths + reads_ch = Channel.fromPath(params.reads_bam).splitText() + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) + + // Collect variant calling outputs across samples + all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() + all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() + + // Combine GVCFs into a GenomicsDB datastore + GATK_GENOMICSDB( + all_gvcfs_ch, + all_idxs_ch, + intervals_file, + params.cohort_name + ) +} + /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -41,29 +75,27 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf", emit: vcf + path "${input_bam}.g.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.g.vcf \ - -L ${interval_list} \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.g.vcf \\ + -L ${interval_list} \\ -ERC GVCF """ } @@ -72,62 +104,24 @@ process GATK_HAPLOTYPECALLER { * Combine GVCFs into GenomicsDB datastore */ process GATK_GENOMICSDB { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - path all_gvcfs - path all_idxs - path interval_list - val cohort_name + path all_gvcfs + path all_idxs + path interval_list + val cohort_name output: - path "${cohort_name}_gdb" + path "${cohort_name}_gdb" script: - def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') + def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') """ - gatk GenomicsDBImport \ - ${gvcfs_line} \ - -L ${interval_list} \ + gatk GenomicsDBImport \\ + ${gvcfs_line} \\ + -L ${interval_list} \\ --genomicsdb-workspace-path ${cohort_name}_gdb """ } - -workflow { - - // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) - - // Collect variant calling outputs across samples - all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() - all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() - - // Combine GVCFs into a GenomicsDB datastore - GATK_GENOMICSDB( - all_gvcfs_ch, - all_idxs_ch, - intervals_file, - params.cohort_name - ) -} diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf index 7175c13f9..df834fbad 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf @@ -8,32 +8,69 @@ params.reads_bam = "${projectDir}/data/sample_bams.txt" // Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" // Base name for final output file params.cohort_name = "family_trio" +workflow { + + // Create input channel from a text file listing input file paths + reads_ch = Channel.fromPath(params.reads_bam).splitText() + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) + + // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) + + // Collect variant calling outputs across samples + all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() + all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() + + // Combine GVCFs into a GenomicsDB data store and apply joint genotyping + GATK_JOINTGENOTYPING( + all_gvcfs_ch, + all_idxs_ch, + intervals_file, + params.cohort_name, + ref_file, + ref_index_file, + ref_dict_file + ) +} + /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir 'results_genomics', mode: 'symlink' input: - path input_bam + path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") + tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' + samtools index '${input_bam}' """ } @@ -41,29 +78,27 @@ process SAMTOOLS_INDEX { * Call variants with GATK HaplotypeCaller */ process GATK_HAPLOTYPECALLER { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list output: - path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf", emit: vcf + path "${input_bam}.g.vcf.idx", emit: idx script: """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.g.vcf \ - -L ${interval_list} \ + gatk HaplotypeCaller \\ + -R ${ref_fasta} \\ + -I ${input_bam} \\ + -O ${input_bam}.g.vcf \\ + -L ${interval_list} \\ -ERC GVCF """ } @@ -72,75 +107,34 @@ process GATK_HAPLOTYPECALLER { * Combine GVCFs into GenomicsDB datastore and run joint genotyping to produce cohort-level calls */ process GATK_JOINTGENOTYPING { - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - publishDir 'results_genomics', mode: 'symlink' input: - path all_gvcfs - path all_idxs - path interval_list - val cohort_name - path ref_fasta - path ref_index - path ref_dict + path all_gvcfs + path all_idxs + path interval_list + val cohort_name + path ref_fasta + path ref_index + path ref_dict output: - path "${cohort_name}.joint.vcf" - path "${cohort_name}.joint.vcf.idx" + path "${cohort_name}.joint.vcf" + path "${cohort_name}.joint.vcf.idx" script: - def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') + def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') """ - gatk GenomicsDBImport \ - ${gvcfs_line} \ - -L ${interval_list} \ + gatk GenomicsDBImport \\ + ${gvcfs_line} \\ + -L ${interval_list} \\ --genomicsdb-workspace-path ${cohort_name}_gdb - gatk GenotypeGVCFs \ - -R ${ref_fasta} \ - -V gendb://${cohort_name}_gdb \ - -L ${interval_list} \ + gatk GenotypeGVCFs \\ + -R ${ref_fasta} \\ + -V gendb://${cohort_name}_gdb \\ + -L ${interval_list} \\ -O ${cohort_name}.joint.vcf """ } - -workflow { - - // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) - - // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) - - // Collect variant calling outputs across samples - all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() - all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() - - // Combine GVCFs into a GenomicsDB data store and apply joint genotyping - GATK_JOINTGENOTYPING( - all_gvcfs_ch, - all_idxs_ch, - intervals_file, - params.cohort_name, - ref_file, - ref_index_file, - ref_dict_file - ) -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-1.nf b/hello-nextflow/solutions/hello-world/hello-world-1.nf index a67f1016c..7c791526d 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-1.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-1.nf @@ -1,16 +1,15 @@ #!/usr/bin/env nextflow +workflow { + sayHello() +} -process sayHello { +process sayHello { output: - stdout + stdout script: """ echo 'Hello World!' """ } - -workflow { - sayHello() -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-2.nf b/hello-nextflow/solutions/hello-world/hello-world-2.nf index 53ee46f85..8645a8a88 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-2.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-2.nf @@ -1,21 +1,20 @@ #!/usr/bin/env nextflow +workflow { + + // emit a greeting + sayHello() +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - output: - stdout + stdout script: """ echo 'Hello World!' """ } - -workflow { - - // emit a greeting - sayHello() -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-3.nf b/hello-nextflow/solutions/hello-world/hello-world-3.nf index 026a593a8..7c8192856 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-3.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-3.nf @@ -1,23 +1,22 @@ #!/usr/bin/env nextflow +workflow { + + // emit a greeting + sayHello() +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - publishDir 'results', mode: 'copy' output: - path 'output.txt' + path 'output.txt' script: """ echo 'Hello World!' > output.txt """ } - -workflow { - - // emit a greeting - sayHello() -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-4.nf b/hello-nextflow/solutions/hello-world/hello-world-4.nf index 026a593a8..7c8192856 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-4.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-4.nf @@ -1,23 +1,22 @@ #!/usr/bin/env nextflow +workflow { + + // emit a greeting + sayHello() +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - publishDir 'results', mode: 'copy' output: - path 'output.txt' + path 'output.txt' script: """ echo 'Hello World!' > output.txt """ } - -workflow { - - // emit a greeting - sayHello() -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-5.nf b/hello-nextflow/solutions/hello-world/hello-world-5.nf index 7d7720960..fde98a64c 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-5.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-5.nf @@ -1,29 +1,28 @@ #!/usr/bin/env nextflow +workflow { + + // create a channel for inputs + greeting_ch = Channel.of('Hello world!') + + // emit a greeting + sayHello(greeting_ch) +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - publishDir 'results', mode: 'copy' input: - val greeting + val greeting output: - path "output.txt" + path "output.txt" script: """ - echo '$greeting' > output.txt + echo '${greeting}' > output.txt """ } - -workflow { - - // create a channel for inputs - greeting_ch = Channel.of('Hello world!') - - // emit a greeting - sayHello(greeting_ch) -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-6.nf b/hello-nextflow/solutions/hello-world/hello-world-6.nf index 42350793a..45bb814a7 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-6.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-6.nf @@ -5,30 +5,29 @@ */ params.greeting = "Bonjour le monde!" +workflow { + + // create a channel for inputs + greeting_ch = Channel.of(params.greeting) + + // emit a greeting + sayHello(greeting_ch) +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - publishDir 'results', mode: 'copy' input: - val greeting + val greeting output: - path "output.txt" + path "output.txt" script: """ - echo '$greeting' > "output.txt" + echo '${greeting}' > "output.txt" """ } - -workflow { - - // create a channel for inputs - greeting_ch = Channel.of(params.greeting) - - // emit a greeting - sayHello(greeting_ch) -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-7.nf b/hello-nextflow/solutions/hello-world/hello-world-7.nf index 401dc4ff9..78064df09 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-7.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-7.nf @@ -5,22 +5,33 @@ */ params.greeting = "Bonjour le monde!" +workflow { + + // create a channel for inputs + greeting_ch = Channel.of(params.greeting) + + // emit a greeting + sayHello(greeting_ch) + + // convert the greeting to uppercase + convertToUpper(sayHello.out) +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - publishDir 'results', mode: 'copy' input: - val greeting + val greeting output: - path "output.txt" + path "output.txt" script: """ - echo '$greeting' > "output.txt" + echo '${greeting}' > "output.txt" """ } @@ -29,25 +40,13 @@ process sayHello { */ process convertToUpper { input: - path input_file + path input_file output: - path "UPPER-${input_file}" + path "UPPER-${input_file}" script: """ - cat '$input_file' | tr '[a-z]' '[A-Z]' > UPPER-${input_file} + cat '${input_file}' | tr '[a-z]' '[A-Z]' > UPPER-${input_file} """ } - -workflow { - - // create a channel for inputs - greeting_ch = Channel.of(params.greeting) - - // emit a greeting - sayHello(greeting_ch) - - // convert the greeting to uppercase - convertToUpper(sayHello.out) -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-8.nf b/hello-nextflow/solutions/hello-world/hello-world-8.nf index 5b7eba63c..80ba67e02 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-8.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-8.nf @@ -5,22 +5,33 @@ */ params.greeting = "Bonjour le monde!" +workflow { + + // create a channel for inputs + greeting_ch = Channel.of('Hello', 'Bonjour', 'Holà') + + // emit a greeting + sayHello(greeting_ch) + + // convert the greeting to uppercase + convertToUpper(sayHello.out) +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - publishDir 'results', mode: 'copy' input: - val greeting + val greeting output: - path "${greeting}-output.txt" + path "${greeting}-output.txt" script: """ - echo '$greeting' > '$greeting-output.txt' + echo '${greeting}' > '${greeting}-output.txt' """ } @@ -29,25 +40,13 @@ process sayHello { */ process convertToUpper { input: - path input_file + path input_file output: - path "UPPER-${input_file}" + path "UPPER-${input_file}" script: """ - cat '$input_file' | tr '[a-z]' '[A-Z]' > UPPER-${input_file} + cat '${input_file}' | tr '[a-z]' '[A-Z]' > UPPER-${input_file} """ } - -workflow { - - // create a channel for inputs - greeting_ch = Channel.of('Hello', 'Bonjour', 'Holà') - - // emit a greeting - sayHello(greeting_ch) - - // convert the greeting to uppercase - convertToUpper(sayHello.out) -} diff --git a/hello-nextflow/solutions/hello-world/hello-world-9.nf b/hello-nextflow/solutions/hello-world/hello-world-9.nf index 22bcb5f07..58820b347 100755 --- a/hello-nextflow/solutions/hello-world/hello-world-9.nf +++ b/hello-nextflow/solutions/hello-world/hello-world-9.nf @@ -5,22 +5,33 @@ */ params.input_file = "data/greetings.csv" +workflow { + + // create a channel for inputs from a CSV file + greeting_ch = Channel.fromPath(params.input_file).splitCsv().flatten() + + // emit a greeting + sayHello(greeting_ch) + + // convert the greeting to uppercase + convertToUpper(sayHello.out) +} + /* * Use echo to print 'Hello World!' to standard out */ process sayHello { - publishDir 'results', mode: 'copy' input: - val greeting + val greeting output: - path "${greeting}-output.txt" + path "${greeting}-output.txt" script: """ - echo '$greeting' > '$greeting-output.txt' + echo '${greeting}' > '${greeting}-output.txt' """ } @@ -28,29 +39,16 @@ process sayHello { * Use a text replace utility to convert the greeting to uppercase */ process convertToUpper { - publishDir 'results', mode: 'copy' input: - path input_file + path input_file output: - path "UPPER-${input_file}" + path "UPPER-${input_file}" script: """ - cat '$input_file' | tr '[a-z]' '[A-Z]' > UPPER-${input_file} + cat '${input_file}' | tr '[a-z]' '[A-Z]' > UPPER-${input_file} """ } - -workflow { - - // create a channel for inputs from a CSV file - greeting_ch = Channel.fromPath(params.input_file).splitCsv().flatten() - - // emit a greeting - sayHello(greeting_ch) - - // convert the greeting to uppercase - convertToUpper(sayHello.out) -}