From daa3b42893a860d22b865ec458a2602ad8d1e581 Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 13:53:18 +0100 Subject: [PATCH 01/11] feat: add savana for SV calling --- workflow/envs/savana.yaml | 6 ++++++ workflow/rules/candidate_calling.smk | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 workflow/envs/savana.yaml diff --git a/workflow/envs/savana.yaml b/workflow/envs/savana.yaml new file mode 100644 index 000000000..3018e5394 --- /dev/null +++ b/workflow/envs/savana.yaml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - nodefaults +dependencies: + - savana =1.3.2 \ No newline at end of file diff --git a/workflow/rules/candidate_calling.smk b/workflow/rules/candidate_calling.smk index 17cd72546..da2f64cb5 100644 --- a/workflow/rules/candidate_calling.smk +++ b/workflow/rules/candidate_calling.smk @@ -23,6 +23,25 @@ rule freebayes: "v2.7.0/bio/freebayes" +rule savana: + input: + ref=access.random(genome), + ref_idx=genome_fai, + aln="results/recal/{sample}.{ext}", + index="results/recal/{sample}.{ext}.bai", + output: + "results/candidate-calls/{sample}.savana.bcf", + conda: + "../envs/savana.yaml", + log: + "logs/savana/{sample}.log", + shadow: "minimal" + threads: 8 + shell: + "(savana to --tumour {input.aln} --ref {input.ref} --outdir . &&" + " mv *_sv_breakpoints.vcf {output}) 2> {log}" + + rule delly: input: ref=access.random(genome), From a32739cf570148ea51e924411c8ae4e65fce6348 Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 14:02:44 +0100 Subject: [PATCH 02/11] add target and test --- .github/workflows/main.yml | 11 ++++++++++- workflow/Snakefile | 8 ++++++++ workflow/rules/candidate_calling.smk | 7 ++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c7e2aadb3..3c38c8f1f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -92,7 +92,16 @@ jobs: directory: .test snakefile: workflow/Snakefile args: > - --conda-cleanup-pkgs cache --sdm conda --cores 10 ${{ matrix.case.args }} + --conda-cleanup-pkgs cache --sdm conda --cores 10 ${{ matrix.case.args }} + show-disk-usage-on-error: true + + - name: test savana + uses: snakemake/snakemake-github-action@v2 + with: + directory: .test + snakefile: workflow/Snakefile + args: > + only_savana --conda-cleanup-pkgs cache --sdm conda --cores 10 ${{ matrix.case.args }} show-disk-usage-on-error: true - name: generate report diff --git a/workflow/Snakefile b/workflow/Snakefile index e4a426096..e73fb94fa 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -61,6 +61,14 @@ rule only_alignment: expand("results/mapped/{sample}.cram", sample=samples["sample_name"]), +rule only_savana: + input: + expand( + "results/candidate-calls/{sample}.savana.vcf", + sample=samples["sample_name"], + ), + + rule benchmark: input: expand( diff --git a/workflow/rules/candidate_calling.smk b/workflow/rules/candidate_calling.smk index da2f64cb5..3302ece36 100644 --- a/workflow/rules/candidate_calling.smk +++ b/workflow/rules/candidate_calling.smk @@ -30,12 +30,13 @@ rule savana: aln="results/recal/{sample}.{ext}", index="results/recal/{sample}.{ext}.bai", output: - "results/candidate-calls/{sample}.savana.bcf", + "results/candidate-calls/{sample}.savana.vcf", conda: - "../envs/savana.yaml", + "../envs/savana.yaml" log: "logs/savana/{sample}.log", - shadow: "minimal" + shadow: + "minimal" threads: 8 shell: "(savana to --tumour {input.aln} --ref {input.ref} --outdir . &&" From b7e80e903933cdd3b1bab35fe7c1e41f96a292cb Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 14:14:11 +0100 Subject: [PATCH 03/11] fix --- workflow/rules/candidate_calling.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/candidate_calling.smk b/workflow/rules/candidate_calling.smk index 3302ece36..b12d8a4c9 100644 --- a/workflow/rules/candidate_calling.smk +++ b/workflow/rules/candidate_calling.smk @@ -27,8 +27,8 @@ rule savana: input: ref=access.random(genome), ref_idx=genome_fai, - aln="results/recal/{sample}.{ext}", - index="results/recal/{sample}.{ext}.bai", + aln="results/recal/{sample}.bam", + index="results/recal/{sample}.bai", output: "results/candidate-calls/{sample}.savana.vcf", conda: From 63f7ef9d7f551b6e52b247b4b264201fde51ec9b Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 14:15:50 +0100 Subject: [PATCH 04/11] annotate access pattern --- workflow/rules/candidate_calling.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/candidate_calling.smk b/workflow/rules/candidate_calling.smk index b12d8a4c9..6e421eace 100644 --- a/workflow/rules/candidate_calling.smk +++ b/workflow/rules/candidate_calling.smk @@ -27,7 +27,7 @@ rule savana: input: ref=access.random(genome), ref_idx=genome_fai, - aln="results/recal/{sample}.bam", + aln=access.random("results/recal/{sample}.bam"), index="results/recal/{sample}.bai", output: "results/candidate-calls/{sample}.savana.vcf", From 9a7cdd6eb188eb820a4b66bdfd4928a9f24f0c2c Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 14:49:08 +0100 Subject: [PATCH 05/11] CNV calling and germline event handling --- .github/workflows/main.yml | 9 +++++++ .test/config-chm-eval/config.yaml | 4 +++ .test/config-giab/config.yaml | 4 +++ .../config-no-candidate-filtering/config.yaml | 2 ++ .test/config-simple/config.yaml | 2 ++ .test/config-sra/config.yaml | 2 ++ .test/config-target-regions/config.yaml | 2 ++ .test/config_primers/config.yaml | 2 ++ config/config.yaml | 6 +++++ workflow/Snakefile | 1 + workflow/rules/candidate_calling.smk | 15 +++++++---- workflow/rules/common.smk | 2 ++ workflow/rules/germline_snvs.smk | 27 +++++++++++++++++++ 13 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 workflow/rules/germline_snvs.smk diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3c38c8f1f..412cdf245 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,42 +25,50 @@ jobs: --show-failed-logs -j 10 --conda-cleanup-pkgs cache --dryrun report: false cleanup: false + testcase_gen: false - name: local input args: >- --configfile .test/config-simple/config.yaml --show-failed-logs -j 10 --conda-cleanup-pkgs cache report: true cleanup: false + testcase_gen: false - name: testcase generation args: >- results/testcases/one/freebayes/IX:314200 --configfile .test/config-simple/config.yaml report: false cleanup: false + testcase_gen: true - name: target regions args: >- --configfile .test/config-target-regions/config.yaml report: true cleanup: false + testcase_gen: false - name: target regions, multiple BEDs args: >- --configfile .test/config-target-regions/config_multiple_beds.yaml report: true cleanup: false + testcase_gen: false - name: no candidate filtering args: >- --configfile .test/config-no-candidate-filtering/config.yaml report: true cleanup: false + testcase_gen: false - name: primers args: >- --configfile .test/config_primers/config.yaml report: true cleanup: false + testcase_gen: false - name: sra download args: >- --configfile .test/config-sra/config.yaml report: true cleanup: false + testcase_gen: false name: test ${{ matrix.case.name }} steps: @@ -97,6 +105,7 @@ jobs: - name: test savana uses: snakemake/snakemake-github-action@v2 + if: ${{ !matrix.case.testcase_gen }} with: directory: .test snakefile: workflow/Snakefile diff --git a/.test/config-chm-eval/config.yaml b/.test/config-chm-eval/config.yaml index a8bb79ca5..e2817c4c3 100644 --- a/.test/config-chm-eval/config.yaml +++ b/.test/config-chm-eval/config.yaml @@ -41,6 +41,10 @@ calling: activate: true freebayes: activate: true + savana: + activate: true + germline_events: + - present # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config-chm-eval/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter diff --git a/.test/config-giab/config.yaml b/.test/config-giab/config.yaml index 6742e6e7b..5b27b4bef 100644 --- a/.test/config-giab/config.yaml +++ b/.test/config-giab/config.yaml @@ -85,6 +85,10 @@ calling: activate: false freebayes: activate: true + savana: + activate: true + germline_events: + - present # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config-giab/scenario.yaml filter: diff --git a/.test/config-no-candidate-filtering/config.yaml b/.test/config-no-candidate-filtering/config.yaml index c77b75259..1511ec0a5 100644 --- a/.test/config-no-candidate-filtering/config.yaml +++ b/.test/config-no-candidate-filtering/config.yaml @@ -42,6 +42,8 @@ calling: activate: true freebayes: activate: true + savana: + activate: true # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config-no-candidate-filtering/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter diff --git a/.test/config-simple/config.yaml b/.test/config-simple/config.yaml index 0e0abc552..5035b79e8 100644 --- a/.test/config-simple/config.yaml +++ b/.test/config-simple/config.yaml @@ -41,6 +41,8 @@ calling: activate: true freebayes: activate: true + savana: + activate: true # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config-simple/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter diff --git a/.test/config-sra/config.yaml b/.test/config-sra/config.yaml index 21df0691d..dad594657 100644 --- a/.test/config-sra/config.yaml +++ b/.test/config-sra/config.yaml @@ -39,6 +39,8 @@ calling: activate: true freebayes: activate: true + savana: + activate: true # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config-sra/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter diff --git a/.test/config-target-regions/config.yaml b/.test/config-target-regions/config.yaml index eb0290d41..46048cd99 100644 --- a/.test/config-target-regions/config.yaml +++ b/.test/config-target-regions/config.yaml @@ -42,6 +42,8 @@ calling: activate: true freebayes: activate: true + savana: + activate: true # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config-target-regions/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter diff --git a/.test/config_primers/config.yaml b/.test/config_primers/config.yaml index 15ad0df3d..c991a78a1 100644 --- a/.test/config_primers/config.yaml +++ b/.test/config_primers/config.yaml @@ -43,6 +43,8 @@ calling: activate: true freebayes: activate: true + savana: + activate: true # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config_primers/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter diff --git a/config/config.yaml b/config/config.yaml index 78600b689..d746d4ffc 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -115,6 +115,12 @@ calling: # exclude_regions: freebayes: activate: true + savana: + activate: true + # Events from the varlociraptor scenario to use as germline event for CNV calling + # with savana. If not set, no CNV calling will be performed. + germline_events: + - germline # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config/scenario.yaml filter: diff --git a/workflow/Snakefile b/workflow/Snakefile index e73fb94fa..0eaf4f749 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -27,6 +27,7 @@ include: "rules/benchmarking.smk" include: "rules/ref.smk" include: "rules/trimming.smk" include: "rules/mapping.smk" +include: "rules/germline_snvs.smk" include: "rules/candidate_calling.smk" include: "rules/calling.smk" include: "rules/annotation.smk" diff --git a/workflow/rules/candidate_calling.smk b/workflow/rules/candidate_calling.smk index 6e421eace..46090f991 100644 --- a/workflow/rules/candidate_calling.smk +++ b/workflow/rules/candidate_calling.smk @@ -29,18 +29,23 @@ rule savana: ref_idx=genome_fai, aln=access.random("results/recal/{sample}.bam"), index="results/recal/{sample}.bai", + germline_snvs="results/germline-snvs/{group}.bcf" if germline_events else [], output: - "results/candidate-calls/{sample}.savana.vcf", - conda: - "../envs/savana.yaml" + "results/candidate-calls/{sample}.savana.bcf", log: "logs/savana/{sample}.log", + params: + snvs=lambda w, input: ( + f"--snp_vcf {input.germline_snvs}" if germline_events else "" + ), + conda: + "../envs/savana.yaml" shadow: "minimal" threads: 8 shell: - "(savana to --tumour {input.aln} --ref {input.ref} --outdir . &&" - " mv *_sv_breakpoints.vcf {output}) 2> {log}" + "(savana to --tumour {input.aln} --ref {input.ref} --outdir . {params.snvs} &&" + " bcftools view -Ob -o {output} *_sv_breakpoints.vcf) 2> {log}" rule delly: diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index f042288dd..d6d804a5c 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -44,6 +44,8 @@ delly_excluded_regions = { ("homo_sapiens", "GRCh37"): "human.hg19", } +germline_events = lookup("calling/savana/germline_events", within=config, default=[]) + def _group_or_sample(row): group = row.get("group", None) diff --git a/workflow/rules/germline_snvs.smk b/workflow/rules/germline_snvs.smk new file mode 100644 index 000000000..52a00fee6 --- /dev/null +++ b/workflow/rules/germline_snvs.smk @@ -0,0 +1,27 @@ +rule gather_germline_calls: + input: + calls="results/calls/{group}.freebayes.{scatteritem}.bcf", + idx="results/calls/{group}.freebayes.{scatteritem}.bcf.csi", + output: + pipe("results/germline-snvs/{group}.germline_snv_candidates.bcf"), + log: + "logs/germline-snvs/gather-calls/{group}.log", + params: + extra="-a", + wrapper: + "v2.3.2/bio/bcftools/concat" + + +rule control_fdr_germline_snvs: + input: + "results/germline-snvs/{group}.germline_snv_candidates.bcf", + output: + "results/germline-snvs/{group}.bcf", + params: + events=germline_events, + log: + "logs/germline-snvs/fdr-control/{group}.log", + conda: + "../envs/varlociraptor.yaml" + shell: + "varlociraptor filter-calls control-fdr {input} --events {params.events} --mode local-smart --var snv > {output} 2> {log}" From 1ad170fb2b284699221e87fcc218ca8dfa239b4f Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 14:49:46 +0100 Subject: [PATCH 06/11] add fdr parameter --- workflow/rules/germline_snvs.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/germline_snvs.smk b/workflow/rules/germline_snvs.smk index 52a00fee6..b8f54044e 100644 --- a/workflow/rules/germline_snvs.smk +++ b/workflow/rules/germline_snvs.smk @@ -24,4 +24,4 @@ rule control_fdr_germline_snvs: conda: "../envs/varlociraptor.yaml" shell: - "varlociraptor filter-calls control-fdr {input} --events {params.events} --mode local-smart --var snv > {output} 2> {log}" + "varlociraptor filter-calls control-fdr {input} --events {params.events} --mode local-smart --var snv --fdr 0.05 > {output} 2> {log}" From 88956f42965319f1e6e472b8a75ab51cf7767484 Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 14:57:02 +0100 Subject: [PATCH 07/11] extension --- workflow/Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 0eaf4f749..00b1c11a3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -65,7 +65,7 @@ rule only_alignment: rule only_savana: input: expand( - "results/candidate-calls/{sample}.savana.vcf", + "results/candidate-calls/{sample}.savana.bcf", sample=samples["sample_name"], ), From 0b0d111d7543d671c768e4a647b590b6cf645c45 Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 16:39:38 +0100 Subject: [PATCH 08/11] use my savana fork for now --- workflow/envs/savana.yaml | 9 ++++++++- workflow/rules/candidate_calling.smk | 11 ++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/workflow/envs/savana.yaml b/workflow/envs/savana.yaml index 3018e5394..d67a2c6f7 100644 --- a/workflow/envs/savana.yaml +++ b/workflow/envs/savana.yaml @@ -3,4 +3,11 @@ channels: - bioconda - nodefaults dependencies: - - savana =1.3.2 \ No newline at end of file + - pybedtools==0.9.0 + - pysam==0.20.0 + - cyvcf2==0.30.16 + - scikit-learn==1.2.2 + - pandas==2.0.0 + - matplotlib==3.7.1 + - pip: + - git+https://github.com/johanneskoester/savana.git@fix/empty-calls \ No newline at end of file diff --git a/workflow/rules/candidate_calling.smk b/workflow/rules/candidate_calling.smk index 46090f991..3a02c5c2d 100644 --- a/workflow/rules/candidate_calling.smk +++ b/workflow/rules/candidate_calling.smk @@ -31,7 +31,8 @@ rule savana: index="results/recal/{sample}.bai", germline_snvs="results/germline-snvs/{group}.bcf" if germline_events else [], output: - "results/candidate-calls/{sample}.savana.bcf", + bcf="results/candidate-calls/{sample}.savana.bcf", + outdir=directory("results/candidate-calls/savana/{sample}"), log: "logs/savana/{sample}.log", params: @@ -40,12 +41,12 @@ rule savana: ), conda: "../envs/savana.yaml" - shadow: - "minimal" threads: 8 shell: - "(savana to --tumour {input.aln} --ref {input.ref} --outdir . {params.snvs} &&" - " bcftools view -Ob -o {output} *_sv_breakpoints.vcf) 2> {log}" + "(savana to --tumour {input.aln} --ref {input.ref} --outdir {output.outdir}" + " {params.snvs} &&" + " bcftools view -Ob -o {output.bcf} {output.outdir}/{wildcards.sample}.sv_breakpoints.vcf) " + "2>&1 > {log}" rule delly: From 53294d069886fd7740685e45c678a2944ace347e Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 17:00:27 +0100 Subject: [PATCH 09/11] add bcftools --- workflow/envs/savana.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflow/envs/savana.yaml b/workflow/envs/savana.yaml index d67a2c6f7..7e38b7c7e 100644 --- a/workflow/envs/savana.yaml +++ b/workflow/envs/savana.yaml @@ -3,6 +3,7 @@ channels: - bioconda - nodefaults dependencies: + - bcftools =1.21 - pybedtools==0.9.0 - pysam==0.20.0 - cyvcf2==0.30.16 @@ -10,4 +11,4 @@ dependencies: - pandas==2.0.0 - matplotlib==3.7.1 - pip: - - git+https://github.com/johanneskoester/savana.git@fix/empty-calls \ No newline at end of file + - git+https://github.com/johanneskoester/savana.git@fix/empty-calls \ No newline at end of file From 1a9d4efcfb2237cd78816da0cf9ab738174fde3a Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 18:10:13 +0100 Subject: [PATCH 10/11] set threads --- workflow/rules/candidate_calling.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/candidate_calling.smk b/workflow/rules/candidate_calling.smk index 3a02c5c2d..e9f97a4ed 100644 --- a/workflow/rules/candidate_calling.smk +++ b/workflow/rules/candidate_calling.smk @@ -41,9 +41,9 @@ rule savana: ), conda: "../envs/savana.yaml" - threads: 8 + threads: 16 shell: - "(savana to --tumour {input.aln} --ref {input.ref} --outdir {output.outdir}" + "(savana to --threads {threads} --tumour {input.aln} --ref {input.ref} --outdir {output.outdir}" " {params.snvs} &&" " bcftools view -Ob -o {output.bcf} {output.outdir}/{wildcards.sample}.sv_breakpoints.vcf) " "2>&1 > {log}" From b535811600b354fb24a0df6a3fc581cdfdc78787 Mon Sep 17 00:00:00 2001 From: Johannes Koester Date: Fri, 28 Mar 2025 18:13:18 +0100 Subject: [PATCH 11/11] fix env --- workflow/envs/savana.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/envs/savana.yaml b/workflow/envs/savana.yaml index 7e38b7c7e..bb607fdfa 100644 --- a/workflow/envs/savana.yaml +++ b/workflow/envs/savana.yaml @@ -3,7 +3,7 @@ channels: - bioconda - nodefaults dependencies: - - bcftools =1.21 + - bcftools - pybedtools==0.9.0 - pysam==0.20.0 - cyvcf2==0.30.16