From 5394877d433578c369a4db68f0a403e8c3531af3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Sat, 9 Oct 2021 15:31:58 +0200 Subject: [PATCH 1/3] perf: skip adapter trimming if nothing specified in unit sheet --- workflow/rules/common.smk | 24 ++++++++++++++++-------- workflow/rules/trimming.smk | 10 +++++----- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 5410667aa..e40d1a6ed 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -121,7 +121,7 @@ def get_recalibrate_quality_input(wildcards, bai=False): return "results/mapped/{}.sorted.{}".format(wildcards.sample, ext) -def get_cutadapt_input(wildcards): +def get_cutadapt_input(wildcards, reads=["fq1", "fq2"]): unit = units.loc[wildcards.sample].loc[wildcards.unit] if pd.isna(unit["fq1"]): @@ -145,7 +145,7 @@ def get_cutadapt_input(wildcards): "pipe/cutadapt/{S}/{U}.{{read}}.fastq{E}".format( S=unit.sample_name, U=unit.unit_name, E=ending ), - read=["fq1", "fq2"], + read=reads, ) @@ -545,12 +545,20 @@ def get_tabix_params(wildcards): def get_fastqs(wc): - return expand( - "results/trimmed/{sample}/{unit}_{read}.fastq.gz", - unit=units.loc[wc.sample, "unit_name"], - sample=wc.sample, - read=wc.read, - ) + unit = units.loc[wc.sample] + if not unit["adapters"]: + # no adapter trimming configured, skipping cutadapt for unit + if wc.read == "single": + return get_cutadapt_input(wc) + else: + return get_cutadapt_input(wc, reads=["fq1"] if wc.read == "R1" else ["fq2"]) + else: + return expand( + "results/trimmed/{sample}/{unit}_{read}.fastq.gz", + unit=unit["unit_name"], + sample=wc.sample, + read=wc.read, + ) def get_vembrane_expression(wc): diff --git a/workflow/rules/trimming.smk b/workflow/rules/trimming.smk index e33cff4ec..c238310fd 100644 --- a/workflow/rules/trimming.smk +++ b/workflow/rules/trimming.smk @@ -14,7 +14,7 @@ rule cutadapt_pipe: output: pipe("pipe/cutadapt/{sample}/{unit}.{fq}.{ext}"), log: - "logs/pipe-fastqs/catadapt/{sample}-{unit}.{fq}.{ext}.log", + "logs/pipe-fastqs/cutadapt/{sample}-{unit}.{fq}.{ext}.log", wildcard_constraints: ext=r"fastq|fastq\.gz", threads: 0 # this does not need CPU @@ -26,8 +26,8 @@ rule cutadapt_pe: input: get_cutadapt_input, output: - fastq1="results/trimmed/{sample}/{unit}_R1.fastq.gz", - fastq2="results/trimmed/{sample}/{unit}_R2.fastq.gz", + fastq1=temp("results/trimmed/{sample}/{unit}_R1.fastq.gz"), + fastq2=temp("results/trimmed/{sample}/{unit}_R2.fastq.gz"), qc="results/trimmed/{sample}/{unit}.paired.qc.txt", log: "logs/cutadapt/{sample}-{unit}.log", @@ -43,8 +43,8 @@ rule cutadapt_se: input: get_cutadapt_input, output: - fastq="results/trimmed/{sample}/{unit}.single.fastq.gz", - qc="results/trimmed/{sample}/{unit}.single.qc.txt", + fastq=temp("results/trimmed/{sample}/{unit}_single.fastq.gz"), + qc="results/trimmed/{sample}/{unit}_single.qc.txt", log: "logs/cutadapt/{sample}-{unit}.log", params: From b89ae47529cbbc63bac733275a9efa044c8a418b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Sat, 9 Oct 2021 15:56:52 +0200 Subject: [PATCH 2/3] check for NA --- workflow/rules/common.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index e40d1a6ed..004eefd60 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -546,7 +546,7 @@ def get_tabix_params(wildcards): def get_fastqs(wc): unit = units.loc[wc.sample] - if not unit["adapters"]: + if pd.isna(unit["adapters"]): # no adapter trimming configured, skipping cutadapt for unit if wc.read == "single": return get_cutadapt_input(wc) From 08a4b2d94e340b55854c6e451bcc4a90da7170c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Sat, 9 Oct 2021 15:57:53 +0200 Subject: [PATCH 3/3] check if all adapters are undefined --- workflow/rules/common.smk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 004eefd60..b1bcfb3a8 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -546,7 +546,7 @@ def get_tabix_params(wildcards): def get_fastqs(wc): unit = units.loc[wc.sample] - if pd.isna(unit["adapters"]): + if pd.isna(unit["adapters"]).all(): # no adapter trimming configured, skipping cutadapt for unit if wc.read == "single": return get_cutadapt_input(wc)