diff --git a/.gitignore b/.gitignore index ce58c7f03..04afb0ef6 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ work nf-training/results transcript-index .vscode +results_genomics diff --git a/docs/hello_nextflow/03_hello_containers.md b/docs/hello_nextflow/03_hello_containers.md index 164f3b6e9..333eaaac9 100644 --- a/docs/hello_nextflow/03_hello_containers.md +++ b/docs/hello_nextflow/03_hello_containers.md @@ -115,9 +115,11 @@ One way to do this is to **mount** a **volume** from the host system into the co Prior to working on the next task, confirm that you are in the `hello-nextflow` directory. ```bash -cd /workspace/gitpod/hello-nextflow +pwd ``` +This should show `/workspaces/training/hello-nextflow`. The important point is the `hello-nextflow` is the final path. + Then run: ```bash diff --git a/docs/hello_nextflow/04_hello_genomics.md b/docs/hello_nextflow/04_hello_genomics.md index cc3d8c80a..9ab4488ef 100644 --- a/docs/hello_nextflow/04_hello_genomics.md +++ b/docs/hello_nextflow/04_hello_genomics.md @@ -55,7 +55,7 @@ The tools we need (Samtools and GATK) are not installed in the Gitpod environmen !!! note Make sure you're in the correct working directory: - `cd /workspace/gitpod/hello-nextflow` + `pwd` should return a path ending in `hello-nextflow` ### 0.1. Index a BAM input file with Samtools @@ -562,9 +562,9 @@ This error will not reproduce consistently because it is dependent on some varia This is what the output of the two `.view` calls we added looks like for a failed run: ```console title="Output" -/workspace/gitpod/hello-nextflow/data/bam/reads_mother.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_father.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_son.bam +./data/bam/reads_mother.bam +./data/bam/reads_father.bam +./data/bam/reads_son.bam /workspace/gitpod/hello-nextflow/work/9c/53492e3518447b75363e1cd951be4b/reads_father.bam.bai /workspace/gitpod/hello-nextflow/work/cc/37894fffdf6cc84c3b0b47f9b536b7/reads_son.bam.bai /workspace/gitpod/hello-nextflow/work/4d/dff681a3d137ba7d9866e3d9307bd0/reads_mother.bam.bai @@ -718,9 +718,9 @@ Here we are going to show you how to do the simple case. We already made a text file listing the input file paths, called `sample_bams.txt`, which you can find in the `data/` directory. ```txt title="sample_bams.txt" -/workspace/gitpod/hello-nextflow/data/bam/reads_mother.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_father.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_son.bam +/data/bam/reads_mother.bam +/data/bam/reads_father.bam +/data/bam/reads_son.bam ``` As you can see, we listed one file path per line, and they are absolute paths. @@ -758,7 +758,7 @@ This way we can continue to be lazy, but the list of files no longer lives in th Currently, our input channel factory treats any files we give it as the data inputs we want to feed to the indexing process. Since we're now giving it a file that lists input file paths, we need to change its behavior to parse the file and treat the file paths it contains as the data inputs. -Fortunately we can do that very simply, just by adding the [`.splitText()` operator](https://www.nextflow.io/docs/latest/reference/operator.html#operator-splittext) to the channel construction step. +We are going to use the [`.splitCsv()`](https://www.nextflow.io/docs/latest/operator.html#operator-splitcsv) operator to parse the file into lines, and then use `.map()` to convert each line into a file path object. This introduces some advanced concepts that we'll explain in more detail later in this training series, but for now it's enough to understand that we can manipulate the contents of the samplesheet after we read it in but before we use it. _Before:_ @@ -769,9 +769,11 @@ reads_ch = Channel.fromPath(params.reads_bam) _After:_ -```groovy title="hello-genomics.nf" linenums="68" +````groovy title="hello-genomics.nf" linenums="68" // Create input channel from a text file listing input file paths -reads_ch = Channel.fromPath(params.reads_bam).splitText() +reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } ``` !!! tip @@ -784,7 +786,7 @@ Let's run the workflow one more time. ```bash nextflow run hello-genomics.nf -resume -``` +```` This should produce the same result as before, right? diff --git a/hello-nextflow/data/sample_bams.txt b/hello-nextflow/data/sample_bams.txt index 64e2ce928..c9a6f6bab 100644 --- a/hello-nextflow/data/sample_bams.txt +++ b/hello-nextflow/data/sample_bams.txt @@ -1,3 +1,3 @@ -/workspace/gitpod/hello-nextflow/data/bam/reads_mother.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_father.bam -/workspace/gitpod/hello-nextflow/data/bam/reads_son.bam +data/bam/reads_mother.bam +data/bam/reads_father.bam +data/bam/reads_son.bam diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf index 49e2c4efc..0bc3513eb 100644 --- a/hello-nextflow/hello-config/main.nf +++ b/hello-nextflow/hello-config/main.nf @@ -112,7 +112,9 @@ process GATK_JOINTGENOTYPING { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-modules/main.nf b/hello-nextflow/hello-modules/main.nf index 9904e2cf5..6f08d0184 100644 --- a/hello-nextflow/hello-modules/main.nf +++ b/hello-nextflow/hello-modules/main.nf @@ -96,7 +96,9 @@ process GATK_JOINTGENOTYPING { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-nf-test/main.nf b/hello-nextflow/hello-nf-test/main.nf index a4f588684..fda4f8c2d 100644 --- a/hello-nextflow/hello-nf-test/main.nf +++ b/hello-nextflow/hello-nf-test/main.nf @@ -8,7 +8,9 @@ include { GATK_JOINTGENOTYPING } from './modules/local/gatk/jointgenotyping/main workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/hello-operators.nf b/hello-nextflow/hello-operators.nf index f2ebb2dc0..ea9d8a647 100644 --- a/hello-nextflow/hello-operators.nf +++ b/hello-nextflow/hello-operators.nf @@ -70,7 +70,9 @@ process GATK_HAPLOTYPECALLER { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-config/final-main.nf b/hello-nextflow/solutions/hello-config/final-main.nf index 8aa9e3d87..6f08d0184 100644 --- a/hello-nextflow/solutions/hello-config/final-main.nf +++ b/hello-nextflow/solutions/hello-config/final-main.nf @@ -15,7 +15,7 @@ process SAMTOOLS_INDEX { output: tuple path(input_bam), path("${input_bam}.bai") - + script: """ samtools index '$input_bam' @@ -96,7 +96,9 @@ process GATK_JOINTGENOTYPING { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf index e613d4688..830ffa6e8 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf @@ -52,7 +52,7 @@ process GATK_HAPLOTYPECALLER { output: path "${input_bam}.vcf" , emit: vcf - path "${input_bam}.vcf.idx" , emit: idx + path "${input_bam}.vcf.idx" , emit: idx script: """ @@ -67,7 +67,9 @@ process GATK_HAPLOTYPECALLER { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-modules/final-main.nf b/hello-nextflow/solutions/hello-modules/final-main.nf index a4f588684..fda4f8c2d 100644 --- a/hello-nextflow/solutions/hello-modules/final-main.nf +++ b/hello-nextflow/solutions/hello-modules/final-main.nf @@ -8,7 +8,9 @@ include { GATK_JOINTGENOTYPING } from './modules/local/gatk/jointgenotyping/main workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf index d6ec48c6d..95029b52c 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf @@ -52,7 +52,7 @@ process GATK_HAPLOTYPECALLER { output: path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf.idx" , emit: idx script: """ @@ -68,7 +68,9 @@ process GATK_HAPLOTYPECALLER { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf index 130b5b20f..508955bdf 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf @@ -55,7 +55,7 @@ process GATK_HAPLOTYPECALLER { output: path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf.idx" , emit: idx script: """ @@ -99,7 +99,9 @@ process GATK_GENOMICSDB { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference) diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf index 4baf93f7b..6a2bf26d9 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf @@ -55,7 +55,7 @@ process GATK_HAPLOTYPECALLER { output: path "${input_bam}.g.vcf" , emit: vcf - path "${input_bam}.g.vcf.idx" , emit: idx + path "${input_bam}.g.vcf.idx" , emit: idx script: """ @@ -109,7 +109,9 @@ process GATK_JOINTGENOTYPING { workflow { // Create input channel from a text file listing input file paths - reads_ch = Channel.fromPath(params.reads_bam).splitText() + reads_ch = Channel.fromPath(params.reads_bam) + .splitCsv() + .map { bamPath -> file(bamPath[0]) } // Load the file paths for the accessory files (reference and intervals) ref_file = file(params.reference)