From cc1cec95bb76e361ac9cdb4c33c0c09566387875 Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Wed, 6 Nov 2024 18:08:06 +0000 Subject: [PATCH 1/6] Use relative paths for inputs to make the training portable The training encoded an absolute path in the samplesheet. This broke the training when ran on a devcontainer. Importantly, Gitpod will be deprecating their custom format in April 2025 and using devcontainers so even if we don't merge this PR it will help indicate what we need to change. --- docs/hello_nextflow/04_hello_genomics.md | 3 +- hello-nextflow/data/sample_bams.txt | 6 +- hello-nextflow/hello-config/main.nf | 3 +- hello-nextflow/hello-genomics.nf | 65 +++++++++++++++++-- hello-nextflow/hello-modules/main.nf | 3 +- hello-nextflow/hello-nf-test/main.nf | 3 +- hello-nextflow/hello-operators.nf | 3 +- .../solutions/hello-config/final-main.nf | 5 +- .../hello-genomics/hello-genomics-4.nf | 5 +- .../solutions/hello-modules/final-main.nf | 3 +- .../hello-operators/hello-operators-1.nf | 5 +- .../hello-operators/hello-operators-2.nf | 5 +- .../hello-operators/hello-operators-3.nf | 5 +- 13 files changed, 90 insertions(+), 24 deletions(-) diff --git a/docs/hello_nextflow/04_hello_genomics.md b/docs/hello_nextflow/04_hello_genomics.md index 64252116a..06231fe0d 100644 --- a/docs/hello_nextflow/04_hello_genomics.md +++ b/docs/hello_nextflow/04_hello_genomics.md @@ -770,7 +770,8 @@ _After:_ ```groovy title="hello-genomics.nf" linenums="68" // Create input channel from a text file listing input file paths -reads_ch = Channel.fromPath(params.reads_bam).splitText() +reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } ``` !!! tip diff --git a/hello-nextflow/data/sample_bams.txt b/hello-nextflow/data/sample_bams.txt index 64e2ce928..c9a6f6bab 100644 --- a/hello-nextflow/data/sample_bams.txt +++ b/hello-nextflow/data/sample_bams.txt @@ -1,3 +1,3 @@ -/workspace/gitpod/hello-nextflow/data/bam/reads_mother.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_father.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_son.bam +data/bam/reads_mother.bam +data/bam/reads_father.bam +data/bam/reads_son.bam diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf index c738a0748..e4f615f51 100644 --- a/hello-nextflow/hello-config/main.nf +++ b/hello-nextflow/hello-config/main.nf @@ -109,7 +109,8 @@ process GATK_JOINTGENOTYPING { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-genomics.nf b/hello-nextflow/hello-genomics.nf index 211c97165..13ecd4c63 100755 --- a/hello-nextflow/hello-genomics.nf +++ b/hello-nextflow/hello-genomics.nf @@ -4,32 +4,87 @@ * Pipeline parameters */ -// Primary input +// Primary input (file of input files, one per line) +params.reads_bam = "${projectDir}/data/sample_bams.txt" + +// Accessory files +params.reference = "${projectDir}/data/ref/ref.fasta" +params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.reference_dict = "${projectDir}/data/ref/ref.dict" +params.intervals = "${projectDir}/data/ref/intervals.bed" /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container + container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' - publishDir + publishDir 'results_genomics', mode: 'symlink' input: + path input_bam output: + tuple path(input_bam), path("${input_bam}.bai") script: """ - + samtools index '$input_bam' """ +} + +/* + * Call variants with GATK HaplotypeCaller + */ +process GATK_HAPLOTYPECALLER { + + container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" + + publishDir 'results_genomics', mode: 'symlink' + input: + tuple path(input_bam), path(input_bam_index) + path ref_fasta + path ref_index + path ref_dict + path interval_list + + output: + path "${input_bam}.vcf" , emit: vcf + path "${input_bam}.vcf.idx" , emit: idx + + script: + """ + gatk HaplotypeCaller \ + -R ${ref_fasta} \ + -I ${input_bam} \ + -O ${input_bam}.vcf \ + -L ${interval_list} + """ } workflow { - // Create input channel + // Create input channel from a text file listing input file paths + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } + + // Load the file paths for the accessory files (reference and intervals) + ref_file = file(params.reference) + ref_index_file = file(params.reference_index) + ref_dict_file = file(params.reference_dict) + intervals_file = file(params.intervals) // Create index file for input BAM file + SAMTOOLS_INDEX(reads_ch) + // Call variants from the indexed BAM file + GATK_HAPLOTYPECALLER( + SAMTOOLS_INDEX.out, + ref_file, + ref_index_file, + ref_dict_file, + intervals_file + ) } diff --git a/hello-nextflow/hello-modules/main.nf b/hello-nextflow/hello-modules/main.nf index ddbd345a9..230af9d4a 100644 --- a/hello-nextflow/hello-modules/main.nf +++ b/hello-nextflow/hello-modules/main.nf @@ -96,7 +96,8 @@ process GATK_JOINTGENOTYPING { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-nf-test/main.nf b/hello-nextflow/hello-nf-test/main.nf index a4f588684..ad8726201 100644 --- a/hello-nextflow/hello-nf-test/main.nf +++ b/hello-nextflow/hello-nf-test/main.nf @@ -8,7 +8,8 @@ include { GATK_JOINTGENOTYPING } from './modules/local/gatk/jointgenotyping/main workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-operators.nf b/hello-nextflow/hello-operators.nf index f1c5338c9..13ecd4c63 100644 --- a/hello-nextflow/hello-operators.nf +++ b/hello-nextflow/hello-operators.nf @@ -67,7 +67,8 @@ process GATK_HAPLOTYPECALLER { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-config/final-main.nf b/hello-nextflow/solutions/hello-config/final-main.nf index 5179779e1..230af9d4a 100644 --- a/hello-nextflow/solutions/hello-config/final-main.nf +++ b/hello-nextflow/solutions/hello-config/final-main.nf @@ -15,7 +15,7 @@ process SAMTOOLS_INDEX { output: tuple path(input_bam), path("${input_bam}.bai") - + script: """ samtools index '$input_bam' @@ -96,7 +96,8 @@ process GATK_JOINTGENOTYPING { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf index c0cb837e6..13ecd4c63 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf @@ -52,7 +52,7 @@ process GATK_HAPLOTYPECALLER { output: path "${input_bam}.vcf" , emit: vcf - path "${input_bam}.vcf.idx" , emit: idx + path "${input_bam}.vcf.idx" , emit: idx script: """ @@ -67,7 +67,8 @@ process GATK_HAPLOTYPECALLER { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-modules/final-main.nf b/hello-nextflow/solutions/hello-modules/final-main.nf index a4f588684..ad8726201 100644 --- a/hello-nextflow/solutions/hello-modules/final-main.nf +++ b/hello-nextflow/solutions/hello-modules/final-main.nf @@ -8,7 +8,8 @@ include { GATK_JOINTGENOTYPING } from './modules/local/gatk/jointgenotyping/main workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf index c55772892..ffe4b5a35 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf @@ -52,7 +52,7 @@ process GATK_HAPLOTYPECALLER { output: path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf.idx" , emit: idx script: """ @@ -68,7 +68,8 @@ process GATK_HAPLOTYPECALLER { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf index fa6574aba..a478e4aac 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf @@ -55,7 +55,7 @@ process GATK_HAPLOTYPECALLER { output: path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf.idx" , emit: idx script: """ @@ -99,7 +99,8 @@ process GATK_GENOMICSDB { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf index 7175c13f9..e4f615f51 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf @@ -55,7 +55,7 @@ process GATK_HAPLOTYPECALLER { output: path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf.idx" , emit: idx script: """ @@ -109,7 +109,8 @@ process GATK_JOINTGENOTYPING { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamFile -> file(bamFile.strip()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) From 2e9e2d43724e8f18ae839cfcfe3153f3d9af5e78 Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Wed, 6 Nov 2024 18:13:36 +0000 Subject: [PATCH 2/6] Use slightly simpler map statement --- docs/hello_nextflow/04_hello_genomics.md | 10 +-- hello-nextflow/hello-config/main.nf | 3 +- hello-nextflow/hello-genomics.nf | 65 ++----------------- hello-nextflow/hello-modules/main.nf | 3 +- hello-nextflow/hello-nf-test/main.nf | 3 +- hello-nextflow/hello-operators.nf | 3 +- .../solutions/hello-config/final-main.nf | 3 +- .../hello-genomics/hello-genomics-4.nf | 3 +- .../solutions/hello-modules/final-main.nf | 3 +- .../hello-operators/hello-operators-1.nf | 3 +- .../hello-operators/hello-operators-2.nf | 3 +- .../hello-operators/hello-operators-3.nf | 3 +- 12 files changed, 30 insertions(+), 75 deletions(-) diff --git a/docs/hello_nextflow/04_hello_genomics.md b/docs/hello_nextflow/04_hello_genomics.md index 06231fe0d..4cff7bfc7 100644 --- a/docs/hello_nextflow/04_hello_genomics.md +++ b/docs/hello_nextflow/04_hello_genomics.md @@ -768,11 +768,11 @@ reads_ch = Channel.fromPath(params.reads_bam) _After:_ -```groovy title="hello-genomics.nf" linenums="68" +````groovy title="hello-genomics.nf" linenums="68" // Create input channel from a text file listing input file paths -reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } -``` + reads_ch = Channel.fromPath(params.reads_bam) + .splitText() + .map { it.trim() }``` !!! tip @@ -784,7 +784,7 @@ Let's run the workflow one more time. ```bash nextflow run hello-genomics.nf -resume -``` +```` This should produce the same result as before, right? diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf index e4f615f51..96f16ab5d 100644 --- a/hello-nextflow/hello-config/main.nf +++ b/hello-nextflow/hello-config/main.nf @@ -110,7 +110,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-genomics.nf b/hello-nextflow/hello-genomics.nf index 13ecd4c63..211c97165 100755 --- a/hello-nextflow/hello-genomics.nf +++ b/hello-nextflow/hello-genomics.nf @@ -4,87 +4,32 @@ * Pipeline parameters */ -// Primary input (file of input files, one per line) -params.reads_bam = "${projectDir}/data/sample_bams.txt" - -// Accessory files -params.reference = "${projectDir}/data/ref/ref.fasta" -params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.reference_dict = "${projectDir}/data/ref/ref.dict" -params.intervals = "${projectDir}/data/ref/intervals.bed" +// Primary input /* * Generate BAM index file */ process SAMTOOLS_INDEX { - container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' + container - publishDir 'results_genomics', mode: 'symlink' + publishDir input: - path input_bam output: - tuple path(input_bam), path("${input_bam}.bai") script: """ - samtools index '$input_bam' - """ -} - -/* - * Call variants with GATK HaplotypeCaller - */ -process GATK_HAPLOTYPECALLER { - - container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" - - publishDir 'results_genomics', mode: 'symlink' - input: - tuple path(input_bam), path(input_bam_index) - path ref_fasta - path ref_index - path ref_dict - path interval_list - - output: - path "${input_bam}.vcf" , emit: vcf - path "${input_bam}.vcf.idx" , emit: idx - - script: - """ - gatk HaplotypeCaller \ - -R ${ref_fasta} \ - -I ${input_bam} \ - -O ${input_bam}.vcf \ - -L ${interval_list} """ + } workflow { - // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } - - // Load the file paths for the accessory files (reference and intervals) - ref_file = file(params.reference) - ref_index_file = file(params.reference_index) - ref_dict_file = file(params.reference_dict) - intervals_file = file(params.intervals) + // Create input channel // Create index file for input BAM file - SAMTOOLS_INDEX(reads_ch) - // Call variants from the indexed BAM file - GATK_HAPLOTYPECALLER( - SAMTOOLS_INDEX.out, - ref_file, - ref_index_file, - ref_dict_file, - intervals_file - ) } diff --git a/hello-nextflow/hello-modules/main.nf b/hello-nextflow/hello-modules/main.nf index 230af9d4a..c05a3995e 100644 --- a/hello-nextflow/hello-modules/main.nf +++ b/hello-nextflow/hello-modules/main.nf @@ -97,7 +97,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-nf-test/main.nf b/hello-nextflow/hello-nf-test/main.nf index ad8726201..1ae2f3b91 100644 --- a/hello-nextflow/hello-nf-test/main.nf +++ b/hello-nextflow/hello-nf-test/main.nf @@ -9,7 +9,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-operators.nf b/hello-nextflow/hello-operators.nf index 13ecd4c63..9ba6aeb45 100644 --- a/hello-nextflow/hello-operators.nf +++ b/hello-nextflow/hello-operators.nf @@ -68,7 +68,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-config/final-main.nf b/hello-nextflow/solutions/hello-config/final-main.nf index 230af9d4a..c05a3995e 100644 --- a/hello-nextflow/solutions/hello-config/final-main.nf +++ b/hello-nextflow/solutions/hello-config/final-main.nf @@ -97,7 +97,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf index 13ecd4c63..9ba6aeb45 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf @@ -68,7 +68,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-modules/final-main.nf b/hello-nextflow/solutions/hello-modules/final-main.nf index ad8726201..1ae2f3b91 100644 --- a/hello-nextflow/solutions/hello-modules/final-main.nf +++ b/hello-nextflow/solutions/hello-modules/final-main.nf @@ -9,7 +9,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf index ffe4b5a35..a57cdc328 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf @@ -69,7 +69,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf index a478e4aac..14b4d3702 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf @@ -100,7 +100,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf index e4f615f51..96f16ab5d 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf @@ -110,7 +110,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamFile -> file(bamFile.strip()) } + .splitText() + .map { it.trim() } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) From e7957dec4300751a6cc1cfd829243e4479d8b3cb Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Wed, 6 Nov 2024 18:20:01 +0000 Subject: [PATCH 3/6] Change reference to changing into the absolute path --- docs/hello_nextflow/03_hello_containers.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/hello_nextflow/03_hello_containers.md b/docs/hello_nextflow/03_hello_containers.md index 2e43fdb6a..4963973c9 100644 --- a/docs/hello_nextflow/03_hello_containers.md +++ b/docs/hello_nextflow/03_hello_containers.md @@ -115,9 +115,11 @@ One way to do this is to **mount** a **volume** from the host system into the co Prior to working on the next task, confirm that you are in the `hello-nextflow` directory. ```bash -cd /workspace/gitpod/hello-nextflow +pwd ``` +This should show `/workspaces/training/hello-nextflow`. The important point is the `hello-nextflow` is the final path. + Then run: ```bash From e83a38f9c7fe1d8529d1f0b2999ce3d4af6b7825 Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Wed, 6 Nov 2024 19:08:58 +0000 Subject: [PATCH 4/6] Use file method to convert string to path --- docs/hello_nextflow/04_hello_genomics.md | 20 +++++++++---------- hello-nextflow/hello-config/main.nf | 3 +-- hello-nextflow/hello-modules/main.nf | 3 +-- hello-nextflow/hello-nf-test/main.nf | 3 +-- hello-nextflow/hello-operators.nf | 3 +-- .../solutions/hello-config/final-main.nf | 3 +-- .../hello-genomics/hello-genomics-4.nf | 3 +-- .../solutions/hello-modules/final-main.nf | 3 +-- .../hello-operators/hello-operators-1.nf | 3 +-- .../hello-operators/hello-operators-2.nf | 3 +-- .../hello-operators/hello-operators-3.nf | 3 +-- 11 files changed, 20 insertions(+), 30 deletions(-) diff --git a/docs/hello_nextflow/04_hello_genomics.md b/docs/hello_nextflow/04_hello_genomics.md index 4cff7bfc7..4c6a1cb85 100644 --- a/docs/hello_nextflow/04_hello_genomics.md +++ b/docs/hello_nextflow/04_hello_genomics.md @@ -55,7 +55,7 @@ The tools we need (Samtools and GATK) are not installed in the Gitpod environmen !!! note Make sure you're in the correct working directory: - `cd /workspace/gitpod/hello-nextflow` + `pwd` should return a path ending in `hello-nextflow` ### 0.1. Index a BAM input file with Samtools @@ -561,9 +561,9 @@ This error will not reproduce consistently because it is dependent on some varia This is what the output of the two `.view` calls we added looks like for a failed run: ```console title="Output" -/workspace/gitpod/hello-nextflow/data/bam/reads_mother.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_father.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_son.bam +./data/bam/reads_mother.bam +./data/bam/reads_father.bam +./data/bam/reads_son.bam /workspace/gitpod/hello-nextflow/work/9c/53492e3518447b75363e1cd951be4b/reads_father.bam.bai /workspace/gitpod/hello-nextflow/work/cc/37894fffdf6cc84c3b0b47f9b536b7/reads_son.bam.bai /workspace/gitpod/hello-nextflow/work/4d/dff681a3d137ba7d9866e3d9307bd0/reads_mother.bam.bai @@ -717,9 +717,9 @@ Here we are going to show you how to do the simple case. We already made a text file listing the input file paths, called `sample_bams.txt`, which you can find in the `data/` directory. ```txt title="sample_bams.txt" -/workspace/gitpod/hello-nextflow/data/bam/reads_mother.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_father.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_son.bam +/data/bam/reads_mother.bam +/data/bam/reads_father.bam +/data/bam/reads_son.bam ``` As you can see, we listed one file path per line, and they are absolute paths. @@ -770,9 +770,9 @@ _After:_ ````groovy title="hello-genomics.nf" linenums="68" // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() }``` +reads_ch = Channel.fromPath(params.reads_bam) + .splitText() { bamPath -> file(bamPath.trim()) } +``` !!! tip diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf index 96f16ab5d..e012dab6b 100644 --- a/hello-nextflow/hello-config/main.nf +++ b/hello-nextflow/hello-config/main.nf @@ -110,8 +110,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-modules/main.nf b/hello-nextflow/hello-modules/main.nf index c05a3995e..acd40bb32 100644 --- a/hello-nextflow/hello-modules/main.nf +++ b/hello-nextflow/hello-modules/main.nf @@ -97,8 +97,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-nf-test/main.nf b/hello-nextflow/hello-nf-test/main.nf index 1ae2f3b91..a651598b7 100644 --- a/hello-nextflow/hello-nf-test/main.nf +++ b/hello-nextflow/hello-nf-test/main.nf @@ -9,8 +9,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-operators.nf b/hello-nextflow/hello-operators.nf index 9ba6aeb45..e469c68fd 100644 --- a/hello-nextflow/hello-operators.nf +++ b/hello-nextflow/hello-operators.nf @@ -68,8 +68,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-config/final-main.nf b/hello-nextflow/solutions/hello-config/final-main.nf index c05a3995e..acd40bb32 100644 --- a/hello-nextflow/solutions/hello-config/final-main.nf +++ b/hello-nextflow/solutions/hello-config/final-main.nf @@ -97,8 +97,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf index 9ba6aeb45..e469c68fd 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf @@ -68,8 +68,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-modules/final-main.nf b/hello-nextflow/solutions/hello-modules/final-main.nf index 1ae2f3b91..a651598b7 100644 --- a/hello-nextflow/solutions/hello-modules/final-main.nf +++ b/hello-nextflow/solutions/hello-modules/final-main.nf @@ -9,8 +9,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf index a57cdc328..55fce4452 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf @@ -69,8 +69,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf index 14b4d3702..4a17e0d41 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf @@ -100,8 +100,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf index 96f16ab5d..e012dab6b 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf @@ -110,8 +110,7 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() - .map { it.trim() } + .splitText() { bamPath -> file(bamPath.trim()) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) From 08428f0447a8ad7584235c7da3f4259db97e3959 Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Thu, 7 Nov 2024 09:48:06 +0000 Subject: [PATCH 5/6] Add results_genomics to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ce58c7f03..04afb0ef6 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ work nf-training/results transcript-index .vscode +results_genomics From 603e7acd807a96a9443402b9f7de4e1674b8c67a Mon Sep 17 00:00:00 2001 From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> Date: Thu, 7 Nov 2024 09:51:40 +0000 Subject: [PATCH 6/6] Use splitCsv to parse the samplesheet Use splitCsv to parse the sample_bams samplesheet, which means we do not need to strip the newline characters from the file paths and can use relative paths. --- docs/hello_nextflow/04_hello_genomics.md | 5 +++-- hello-nextflow/hello-config/main.nf | 3 ++- hello-nextflow/hello-modules/main.nf | 3 ++- hello-nextflow/hello-nf-test/main.nf | 3 ++- hello-nextflow/hello-operators.nf | 3 ++- hello-nextflow/solutions/hello-config/final-main.nf | 3 ++- hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf | 3 ++- hello-nextflow/solutions/hello-modules/final-main.nf | 3 ++- .../solutions/hello-operators/hello-operators-1.nf | 3 ++- .../solutions/hello-operators/hello-operators-2.nf | 3 ++- .../solutions/hello-operators/hello-operators-3.nf | 3 ++- 11 files changed, 23 insertions(+), 12 deletions(-) diff --git a/docs/hello_nextflow/04_hello_genomics.md b/docs/hello_nextflow/04_hello_genomics.md index 4c6a1cb85..567551633 100644 --- a/docs/hello_nextflow/04_hello_genomics.md +++ b/docs/hello_nextflow/04_hello_genomics.md @@ -757,7 +757,7 @@ This way we can continue to be lazy, but the list of files no longer lives in th Currently, our input channel factory treats any files we give it as the data inputs we want to feed to the indexing process. Since we're now giving it a file that lists input file paths, we need to change its behavior to parse the file and treat the file paths it contains as the data inputs. -Fortunately we can do that very simply, just by adding the [`.splitText()` operator](https://www.nextflow.io/docs/latest/reference/operator.html#operator-splittext) to the channel construction step. +We are going to use the [`.splitCsv()`](https://www.nextflow.io/docs/latest/operator.html#operator-splitcsv) operator to parse the file into lines, and then use `.map()` to convert each line into a file path object. This introduces some advanced concepts that we'll explain in more detail later in this training series, but for now it's enough to understand that we can manipulate the contents of the samplesheet after we read it in but before we use it. _Before:_ @@ -771,7 +771,8 @@ _After:_ ````groovy title="hello-genomics.nf" linenums="68" // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } ``` !!! tip diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf index e012dab6b..8fbe9c4d8 100644 --- a/hello-nextflow/hello-config/main.nf +++ b/hello-nextflow/hello-config/main.nf @@ -110,7 +110,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-modules/main.nf b/hello-nextflow/hello-modules/main.nf index acd40bb32..86680ed06 100644 --- a/hello-nextflow/hello-modules/main.nf +++ b/hello-nextflow/hello-modules/main.nf @@ -97,7 +97,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-nf-test/main.nf b/hello-nextflow/hello-nf-test/main.nf index a651598b7..fda4f8c2d 100644 --- a/hello-nextflow/hello-nf-test/main.nf +++ b/hello-nextflow/hello-nf-test/main.nf @@ -9,7 +9,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-operators.nf b/hello-nextflow/hello-operators.nf index e469c68fd..08519f17d 100644 --- a/hello-nextflow/hello-operators.nf +++ b/hello-nextflow/hello-operators.nf @@ -68,7 +68,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-config/final-main.nf b/hello-nextflow/solutions/hello-config/final-main.nf index acd40bb32..86680ed06 100644 --- a/hello-nextflow/solutions/hello-config/final-main.nf +++ b/hello-nextflow/solutions/hello-config/final-main.nf @@ -97,7 +97,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf index e469c68fd..08519f17d 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf @@ -68,7 +68,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-modules/final-main.nf b/hello-nextflow/solutions/hello-modules/final-main.nf index a651598b7..fda4f8c2d 100644 --- a/hello-nextflow/solutions/hello-modules/final-main.nf +++ b/hello-nextflow/solutions/hello-modules/final-main.nf @@ -9,7 +9,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf index 55fce4452..c84a50a12 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf @@ -69,7 +69,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf index 4a17e0d41..bf16b79ed 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf @@ -100,7 +100,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf index e012dab6b..8fbe9c4d8 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf @@ -110,7 +110,8 @@ workflow { // Create input channel from a text file listing input file paths reads_ch = Channel.fromPath(params.reads_bam) - .splitText() { bamPath -> file(bamPath.trim()) } + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference)