diff --git a/CHANGELOG.md b/CHANGELOG.md index 43968abe..506ec288 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.11.0dev + +## New features + +1. Added a new option to the samplesheet (`msi`). This option takes a boolean to indicate whether or not the MSI status of the sample should be checked. A baseline should be provided using the `--msi_baseline` parameter to run MSI calling. +2. The Multiqc module now generates branded reports. + ## v1.10.1 - Mighty Mechelen - [May 26 2025] ## Fixes diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index cdec4cbf..bd25dd46 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -17,6 +17,12 @@ extra_fn_clean_exts: - type: regex_keep pattern: "^[^\\.]*" module: bcftools + - type: regex + pattern: "_summary_msi$" + - type: regex + pattern: "_all_msi$" + +template: "cmgg" mosdepth_config: general_stats_coverage: diff --git a/conf/test.config b/conf/test.config index 14920d3f..ae8a5b3e 100644 --- a/conf/test.config +++ b/conf/test.config @@ -38,6 +38,7 @@ params { sdf = "https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000_sdf.tar.gz" strtablefile = "https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.strtable.zip" ped = "https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/test_dots.ped" + msi_baseline = "https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/msi_subset.baseline.list" // Pipeline specific parameters filter = true diff --git a/docs/parameters.md b/docs/parameters.md index 6fcee9bd..d69d3266 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -34,6 +34,7 @@ Reference genome related files and options required for the workflow. | `genomes_ignore` | Do not load the local references from the path specified with `--genomes_base` | `boolean` | | | True | | `igenomes_base` | Directory / URL base for iGenomes references. | `string` | | | True | | `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
| `boolean` | | | True | +| `msi_baseline` | Path to the MSI baseline VCF file. | `string` | | | | ## Pipeline specific parameters diff --git a/docs/usage.md b/docs/usage.md index ae90d913..9834ee94 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -101,6 +101,7 @@ The samplesheet can have following columns: | `truth_bed` | OPTIONAL - Full path to the BED file containing the golden truth regions in the `truth_vcf` file. File has to have the extensions `.bed` | | `roi` | OPTIONAL - Full path to a BED file containing the regions of interest for the current sample to call on. When this file is given, the pipeline will run this sample in WES mode. (The flag `--roi ` can also be given to run WES mode for all samples using the file specified by the flag) File has to have the extension `.bed` or `.bed.gz`. | | `vardict_min_af` | OPTIONAL - The minimum AF value to use for the vardict variant caller (`--callers vardict`). This can be set in the samplesheet when it differs for all samples. A default can be set using the `--vardict_min_af` parameter (whichs defaults to 0.1) | +| `msi` | OPTIONAL - Set this to `true` when MSI calling should be performed on this sample. MSI calling is only done when a CRAM file is supplied and a baseline file has been provided via the `--msi_baseline` parameter. (By default this option is `false`) | !!!note diff --git a/main.nf b/main.nf index 28f1ad02..4724bc19 100644 --- a/main.nf +++ b/main.nf @@ -213,6 +213,7 @@ workflow { merged_crams = SMALLVARIANTS.out.merged_crams mosdepth_reports = SMALLVARIANTS.out.mosdepth_reports gvcfs = SMALLVARIANTS.out.gvcfs.filter { _meta, gvcf, _tbi -> gvcf.startsWith(workflow.workDir) } // Filtering out input GVCFs from the output publishing fixes an issue in the current implementation of the workflow output definitions: https://github.com/nextflow-io/nextflow/issues/5480 + msi = SMALLVARIANTS.out.msi single_beds = SMALLVARIANTS.out.single_beds perbase_beds = SMALLVARIANTS.out.perbase_beds validation = SMALLVARIANTS.out.validation @@ -244,6 +245,9 @@ output { gvcf >> "${meta.family}/${meta.id}_${params.unique_out}/${meta.id}.${meta.caller}.g.vcf.gz" tbi >> "${meta.family}/${meta.id}_${params.unique_out}/${meta.id}.${meta.caller}.g.vcf.gz.tbi" } } + msi { path { meta, msi -> + msi >> "${meta.family}/${meta.id}_${params.unique_out}/msi/${msi.name}" + } } single_beds { path { meta, bed -> bed >> "${meta.family}/${meta.id}_${params.unique_out}/${meta.id}.bed" } } diff --git a/nextflow.config b/nextflow.config index c69c22c5..72e874a4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,7 +43,7 @@ params { merge_distance = 100000 updio_common_cnvs = null disable_hc_dict_validation = false - msi_baseline = "https://github.com/nf-cmgg/test-datasets/raw/refs/heads/smallvariants/data/genomics/homo_sapiens/genome/msi_subset.baseline.list" + msi_baseline = null // VEP plugins to use vep_dbnsfp = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 423a37e7..81b537d4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -160,6 +160,13 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "msi_baseline": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.list$", + "description": "Path to the MSI baseline VCF file." } }, "required": ["fasta"] @@ -696,11 +703,5 @@ { "$ref": "#/$defs/annotation_parameters" } - ], - "properties": { - "msi_baseline": { - "type": "string", - "default": "https://github.com/nf-cmgg/test-datasets/raw/refs/heads/smallvariants/data/genomics/homo_sapiens/genome/msi_subset.baseline.list" - } - } + ] } diff --git a/tests/inputs/samplesheet.csv b/tests/inputs/samplesheet.csv index 27fcf525..22396cbc 100644 --- a/tests/inputs/samplesheet.csv +++ b/tests/inputs/samplesheet.csv @@ -1,4 +1,4 @@ -sample,family,cram,crai,roi,ped,truth_vcf,truth_tbi,truth_bed,vardict_min_af -NA24143,Ashkenazim,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/crams/NA24143.cram,,,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/test.ped,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz.tbi,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/regions/roi.bed,0.01 -NA24149,Ashkenazim,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/crams/NA24149.cram,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/crams/NA24149.cram.crai,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/regions/roi.bed,,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/vcfs/NA24149.vcf.gz,,, +sample,family,cram,crai,roi,ped,truth_vcf,truth_tbi,truth_bed,vardict_min_af,msi +NA24143,Ashkenazim,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/crams/NA24143.cram,,,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/test.ped,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/vcfs/NA24143.vcf.gz.tbi,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/regions/roi.bed,0.01,true +NA24149,Ashkenazim,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/crams/NA24149.cram,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/crams/NA24149.cram.crai,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/regions/roi.bed,,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/vcfs/NA24149.vcf.gz,,,,true NA24385,Ashkenazim,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/crams/NA24385.cram,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/crams/NA24385.cram.crai,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/regions/roi.bed,,https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/illumina/vcfs/NA24385.vcf.gz,,, diff --git a/tests/nextflow.config b/tests/nextflow.config index a86703bb..6940feea 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -17,6 +17,7 @@ params { sdf = "https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000_sdf.tar.gz" strtablefile = "https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/hg38_chr21_22000000_23000000.strtable.zip" vep_cache = null + msi_baseline = "https://github.com/nf-cmgg/test-datasets/raw/smallvariants/data/genomics/homo_sapiens/genome/msi_subset.baseline.list" vcfanno_config = "${baseDir}/assets/vcfanno.toml" vcfanno_resources = "https://github.com/brentp/vcfanno/raw/master/example/exac.vcf.gz;https://github.com/brentp/vcfanno/raw/master/example/exac.vcf.gz.tbi" diff --git a/tests/pipeline/callers/main.nf.test.snap b/tests/pipeline/callers/main.nf.test.snap index 90141d6a..b1044904 100644 --- a/tests/pipeline/callers/main.nf.test.snap +++ b/tests/pipeline/callers/main.nf.test.snap @@ -11,6 +11,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -20,6 +24,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -43,7 +51,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:40:04.509699536" + "timestamp": "2025-06-24T16:19:39.591022899" }, "pipeline_callers - vardict + haplotypecaller": { "content": [ @@ -57,6 +65,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -66,6 +78,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -104,7 +120,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:43:29.234774728" + "timestamp": "2025-06-24T16:20:57.091816318" }, "pipeline_callers - vardict": { "content": [ @@ -115,12 +131,20 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.per-base.bed.gz", "Ashkenazim/NA24149__/NA24149.per-base.bed.gz.csi", "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.per-base.bed.gz", "Ashkenazim/NA24385__/NA24385.per-base.bed.gz.csi", @@ -150,6 +174,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:37:36.502218888" + "timestamp": "2025-06-24T16:18:39.878841589" } } \ No newline at end of file diff --git a/tests/pipeline/default/main.nf.test.snap b/tests/pipeline/default/main.nf.test.snap index 0ca4c570..36e57c98 100644 --- a/tests/pipeline/default/main.nf.test.snap +++ b/tests/pipeline/default/main.nf.test.snap @@ -11,6 +11,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -20,6 +24,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -43,6 +51,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:46:02.23259705" + "timestamp": "2025-06-24T16:16:20.341360571" } } \ No newline at end of file diff --git a/tests/pipeline/variations/main.nf.test.snap b/tests/pipeline/variations/main.nf.test.snap index a99834ea..838937a3 100644 --- a/tests/pipeline/variations/main.nf.test.snap +++ b/tests/pipeline/variations/main.nf.test.snap @@ -11,6 +11,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -20,6 +24,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -38,7 +46,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:59:28.595713839" + "timestamp": "2025-06-24T16:26:50.482994509" }, "pipeline_variations - filter": { "content": [ @@ -52,6 +60,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -61,6 +73,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -84,7 +100,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:56:03.019409546" + "timestamp": "2025-06-24T16:25:31.401443204" }, "pipeline_variations - annotate + vcfanno": { "content": [ @@ -98,6 +114,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -107,6 +127,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -130,7 +154,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:53:23.090090675" + "timestamp": "2025-06-24T16:24:33.384043574" }, "pipeline_variations - annotate": { "content": [ @@ -144,6 +168,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -153,6 +181,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -176,7 +208,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:50:24.357133999" + "timestamp": "2025-06-24T16:23:32.985371667" }, "pipeline_variations - only_call": { "content": [ @@ -190,6 +222,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -199,6 +235,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -216,7 +256,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T11:57:46.187295532" + "timestamp": "2025-06-24T16:26:09.254282024" }, "pipeline_variations - automap": { "content": [ @@ -230,6 +270,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -239,6 +283,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -274,6 +322,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T12:02:03.571724156" + "timestamp": "2025-06-24T16:27:44.592789442" } } \ No newline at end of file diff --git a/tests/pipeline/variations2/main.nf.test.snap b/tests/pipeline/variations2/main.nf.test.snap index b67ce7cf..1b84e176 100644 --- a/tests/pipeline/variations2/main.nf.test.snap +++ b/tests/pipeline/variations2/main.nf.test.snap @@ -11,6 +11,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -20,6 +24,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -43,7 +51,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T12:16:10.448532566" + "timestamp": "2025-06-24T16:32:45.150268815" }, "pipeline_variations - normalize": { "content": [ @@ -57,6 +65,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -66,6 +78,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -89,7 +105,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T12:04:42.687739168" + "timestamp": "2025-06-24T16:28:44.293640399" }, "pipeline_variations - updio": { "content": [ @@ -103,6 +119,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -112,6 +132,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -135,7 +159,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T12:07:26.757445048" + "timestamp": "2025-06-24T16:29:44.137295334" }, "pipeline_variations - gemini": { "content": [ @@ -149,6 +173,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24149__/NA24149.bed", "Ashkenazim/NA24149__/NA24149.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24149__/NA24149.haplotypecaller.g.vcf.gz", @@ -158,6 +186,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24385__/NA24385.bed", "Ashkenazim/NA24385__/NA24385.haplotypecaller.bcftools_stats.txt", "Ashkenazim/NA24385__/NA24385.haplotypecaller.g.vcf.gz", @@ -182,7 +214,7 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T12:10:02.843587865" + "timestamp": "2025-06-24T16:30:40.926910183" }, "pipeline_variations - validate": { "content": [ @@ -196,6 +228,10 @@ "Ashkenazim/NA24143__/mosdepth/NA24143.global.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.region.dist.txt", "Ashkenazim/NA24143__/mosdepth/NA24143.summary.txt", + "Ashkenazim/NA24143__/msi/NA24143_all_msi", + "Ashkenazim/NA24143__/msi/NA24143_dis_msi", + "Ashkenazim/NA24143__/msi/NA24143_summary_msi", + "Ashkenazim/NA24143__/msi/NA24143_unstable_msi", "Ashkenazim/NA24143__/validation/haplotypecaller/NA24143.fn.vcf.gz", "Ashkenazim/NA24143__/validation/haplotypecaller/NA24143.fn.vcf.gz.tbi", "Ashkenazim/NA24143__/validation/haplotypecaller/NA24143.fp.vcf.gz", @@ -224,6 +260,10 @@ "Ashkenazim/NA24149__/mosdepth/NA24149.global.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.region.dist.txt", "Ashkenazim/NA24149__/mosdepth/NA24149.summary.txt", + "Ashkenazim/NA24149__/msi/NA24149_all_msi", + "Ashkenazim/NA24149__/msi/NA24149_dis_msi", + "Ashkenazim/NA24149__/msi/NA24149_summary_msi", + "Ashkenazim/NA24149__/msi/NA24149_unstable_msi", "Ashkenazim/NA24149__/validation/haplotypecaller/NA24149.fn.vcf.gz", "Ashkenazim/NA24149__/validation/haplotypecaller/NA24149.fn.vcf.gz.tbi", "Ashkenazim/NA24149__/validation/haplotypecaller/NA24149.fp.vcf.gz", @@ -285,6 +325,6 @@ "nf-test": "0.9.2", "nextflow": "25.04.2" }, - "timestamp": "2025-05-21T12:13:25.633719989" + "timestamp": "2025-06-24T16:31:48.831887623" } } \ No newline at end of file diff --git a/workflows/smallvariants.nf b/workflows/smallvariants.nf index e726041c..c98a2fa8 100644 --- a/workflows/smallvariants.nf +++ b/workflows/smallvariants.nf @@ -167,7 +167,8 @@ workflow SMALLVARIANTS { def ch_elsites = elsites ? Channel.fromPath(elsites).map{ elsites_file -> [[id:'elsites'], elsites_file] }.collect() : [[],[]] - def ch_msi_baseline = Channel.fromPath(msi_baseline).map { msi_file -> [[id:"msi_baseline"], msi_file] } + def ch_msi_baseline = msi_baseline ? Channel.fromPath(msi_baseline).map { msi_file -> [[id:"msi_baseline"], msi_file] }.collect() : [[],[]] + // // Check for the presence of EnsemblVEP plugins that use extra files // @@ -452,13 +453,30 @@ workflow SMALLVARIANTS { // Check for MSI // - def ch_msi_samples = CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams.filter { meta, _cram, _crai -> meta.msi == true} + def msi_warned = false + def ch_msi_samples = CRAM_PREPARE_SAMTOOLS_BEDTOOLS.out.ready_crams + .filter { meta, _cram, _crai -> + if(!msi_baseline) { + if(!msi_warned) { + log.warn("MSI samples were found, but no MSI baseline file was provided. Please provide a baseline file using the '--msi_baseline' parameter. Skipping MSI analysis...") + } + msi_warned = true + return false + } + return meta.msi + } + MSISENSORPRO_PRO( ch_msi_samples, ch_msi_baseline, ch_fasta_ready, ch_fai_ready ) + ch_msisensor_output = MSISENSORPRO_PRO.out.all_msi.mix( + MSISENSORPRO_PRO.out.summary_msi, + MSISENSORPRO_PRO.out.dis_msi, + MSISENSORPRO_PRO.out.unstable_msi + ) ch_reports = ch_reports.mix(MSISENSORPRO_PRO.out.all_msi.map { _meta, file -> file}) ch_reports = ch_reports.mix(MSISENSORPRO_PRO.out.summary_msi.map { _meta, file -> file}) ch_versions = ch_versions.mix(MSISENSORPRO_PRO.out.versions.first()) @@ -889,6 +907,7 @@ workflow SMALLVARIANTS { merged_crams = ch_merged_crams // channel: [ val(meta), path(cram), path(crai) ] mosdepth_reports = ch_mosdepth_reports // channel: [ val(meta), path(mosdepth_report) ] gvcfs = ch_gvcfs_ready // channel: [ val(meta), path(gvcf), path(tbi) ] + msi = ch_msisensor_output // channel: [ val(meta), path(file) ] genomicsdb = ch_final_genomicsdb // channel: [ val(meta), path(genomicsdb) ] vcfs = ch_final_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] gemini = ch_final_dbs // channel: [ val(meta), path(db) ]