From 888fc22e87c8b73b2f7c44456850337633b5257c Mon Sep 17 00:00:00 2001 From: famosab Date: Fri, 25 Apr 2025 12:18:29 +0200 Subject: [PATCH 1/3] feat: swap to bwa-mem2 for read-alignment --- workflow/rules/download.smk | 57 +++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/workflow/rules/download.smk b/workflow/rules/download.smk index f474a89..f65a223 100644 --- a/workflow/rules/download.smk +++ b/workflow/rules/download.smk @@ -178,38 +178,71 @@ rule samtools_faidx: "v1.7.2/bio/samtools/faidx" -rule bwa_index: +# rule bwa_index: +# input: +# "resources/reference/genome.fasta", +# output: +# idx=multiext( +# "resources/reference/genome", ".amb", ".ann", ".bwt", ".pac", ".sa" +# ), +# log: +# "logs/bwa-index.log", +# wrapper: +# "v1.8.0/bio/bwa/index" + +# rule bwa_mem: +# input: +# reads=get_bwa_input, +# idx=rules.bwa_index.output, +# output: +# "results/read-alignments/{benchmark}.bam", +# log: +# "logs/bwa-mem/{benchmark}.log", +# params: +# sorting="samtools", # Can be 'none', 'samtools' or 'picard'. +# sort_order="coordinate", # Can be 'queryname' or 'coordinate'. +# threads: 8 +# wrapper: +# "v1.8.0/bio/bwa/mem" + + +rule bwa_mem2_index: input: "resources/reference/genome.fasta", output: idx=multiext( - "resources/reference/genome", ".amb", ".ann", ".bwt", ".pac", ".sa" + "resources/reference/genome", + ".0123", + ".amb", + ".ann", + ".bwt.2bit.64", + ".pac", ), log: - "logs/bwa-index.log", + "logs/bwa-mem2-index.log", wrapper: - "v1.8.0/bio/bwa/index" + "v6.0.1/bio/bwa-mem2/index" -rule bwa_mem: +rule bwa_mem2_mem: input: reads=get_bwa_input, - idx=rules.bwa_index.output, + idx=rules.bwa_mem2_index.output, output: - "results/read-alignments/{benchmark}.bam", + "results/read-alignments/{benchmark}.mem2.bam", log: - "logs/bwa-mem/{benchmark}.log", + "logs/bwa-mem2/{benchmark}.log", params: - sorting="samtools", # Can be 'none', 'samtools' or 'picard'. - sort_order="coordinate", # Can be 'queryname' or 'coordinate'. + sort="samtools", # Can be 'none', 'samtools', or 'picard'. + sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. threads: 8 wrapper: - "v1.8.0/bio/bwa/mem" + "v6.0.1/bio/bwa-mem2/mem" rule mark_duplicates: input: - bams="results/read-alignments/{benchmark}.bam", + bams="results/read-alignments/{benchmark}.mem2.bam", output: bam="results/read-alignments/{benchmark}.dedup.bam", metrics="results/read-alignments/{benchmark}.dedup.metrics.txt", From e5c31fc8b422f598839e3c5007f597f59f194059 Mon Sep 17 00:00:00 2001 From: famosab Date: Tue, 29 Apr 2025 09:10:46 +0200 Subject: [PATCH 2/3] fix: swap to bwa-mem2 --- workflow/rules/download.smk | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/workflow/rules/download.smk b/workflow/rules/download.smk index f65a223..026ba3a 100644 --- a/workflow/rules/download.smk +++ b/workflow/rules/download.smk @@ -178,34 +178,6 @@ rule samtools_faidx: "v1.7.2/bio/samtools/faidx" -# rule bwa_index: -# input: -# "resources/reference/genome.fasta", -# output: -# idx=multiext( -# "resources/reference/genome", ".amb", ".ann", ".bwt", ".pac", ".sa" -# ), -# log: -# "logs/bwa-index.log", -# wrapper: -# "v1.8.0/bio/bwa/index" - -# rule bwa_mem: -# input: -# reads=get_bwa_input, -# idx=rules.bwa_index.output, -# output: -# "results/read-alignments/{benchmark}.bam", -# log: -# "logs/bwa-mem/{benchmark}.log", -# params: -# sorting="samtools", # Can be 'none', 'samtools' or 'picard'. -# sort_order="coordinate", # Can be 'queryname' or 'coordinate'. -# threads: 8 -# wrapper: -# "v1.8.0/bio/bwa/mem" - - rule bwa_mem2_index: input: "resources/reference/genome.fasta", @@ -235,6 +207,7 @@ rule bwa_mem2_mem: params: sort="samtools", # Can be 'none', 'samtools', or 'picard'. sort_order="coordinate", # Can be 'coordinate' (default) or 'queryname'. + #sort_extra="-m 500M" threads: 8 wrapper: "v6.0.1/bio/bwa-mem2/mem" From 89dcb755b91ac9ba398b20100e977f60370f8a1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Famke=20Ba=CC=88uerle?= Date: Thu, 22 May 2025 10:47:25 +0200 Subject: [PATCH 3/3] chore: cleanup rules --- workflow/rules/download.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/download.smk b/workflow/rules/download.smk index f51d2ed..bab49e3 100644 --- a/workflow/rules/download.smk +++ b/workflow/rules/download.smk @@ -198,7 +198,7 @@ rule bwa_mem2_mem: reads=get_bwa_input, idx=rules.bwa_mem2_index.output, output: - "results/read-alignments/{benchmark}.mem2.bam", + "results/read-alignments/{benchmark}.bam", log: "logs/bwa-mem2/{benchmark}.log", params: @@ -212,7 +212,7 @@ rule bwa_mem2_mem: rule mark_duplicates: input: - bams="results/read-alignments/{benchmark}.mem2.bam", + bams="results/read-alignments/{benchmark}.bam", output: bam="results/read-alignments/{benchmark}.dedup.bam", metrics="results/read-alignments/{benchmark}.dedup.metrics.txt",