Skip to content

Commit 043630e

Browse files
committed
make all resource files input to rules
1 parent 86abc78 commit 043630e

File tree

7 files changed

+46
-27
lines changed

7 files changed

+46
-27
lines changed

workflow/rules/processing.smk

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ rule align:
77
output_bai = os.path.join(result_path,"results","{sample}","mapped", "{sample}.bam.bai"),
88
filtered_bam = os.path.join(result_path,"results","{sample}","mapped", "{sample}.filtered.bam"),
99
filtered_bai = os.path.join(result_path,"results","{sample}","mapped", "{sample}.filtered.bam.bai"),
10+
bowtie2_index = config["bowtie2_index"],
1011
bowtie_log = os.path.join(result_path, 'results', "{sample}", 'mapped', '{sample}.txt'),
1112
bowtie_met = os.path.join(result_path, 'results', "{sample}", 'mapped', '{sample}.bowtie2.met'),
1213
fastp_html = os.path.join(result_path, 'results', "{sample}", 'mapped', '{sample}.fastp.html'),
@@ -16,6 +17,8 @@ rule align:
1617
samtools_log = os.path.join(result_path, 'results', "{sample}", 'mapped', '{sample}.samtools.log'),
1718
samtools_flagstat_log = os.path.join(result_path, 'results', "{sample}", 'mapped', '{sample}.samtools_flagstat.log'),
1819
stats = os.path.join(result_path, 'results', "{sample}", '{sample}.align.stats.tsv'),
20+
adapter_fasta = config["adapter_fasta"] if config["adapter_fasta"]!="" else [],
21+
whitelisted_regions = config["whitelisted_regions"],
1922
params:
2023
interleaved_in = lambda w: "--interleaved_in" if samples["{}".format(w.sample)]["read_type"] == "paired" else " ",
2124
interleaved = lambda w: "--interleaved" if samples["{}".format(w.sample)]["read_type"] == "paired" else " ",
@@ -26,7 +29,6 @@ rule align:
2629
sequencing_platform = config["sequencing_platform"],
2730
sequencing_center = config["sequencing_center"],
2831
mitochondria_name = config["mitochondria_name"],
29-
bowtie2_index = config["bowtie2_index"],
3032
resources:
3133
mem_mb=config.get("mem", "16000"),
3234
threads: 4*config.get("threads", 2)
@@ -43,7 +45,7 @@ rule align:
4345
4446
for i in {input}; do samtools fastq $i 2>> "{output.samtools_log}" ; done | \
4547
fastp {params.adapter_sequence} {params.adapter_fasta} --stdin {params.interleaved_in} --stdout --html "{output.fastp_html}" --json "{output.fastp_json}" 2> "{output.fastp_log}" | \
46-
bowtie2 $RG --very-sensitive --no-discordant -p {threads} --maxins 2000 -x {params.bowtie2_index} --met-file "{output.bowtie_met}" {params.interleaved} - 2> "{output.bowtie_log}" | \
48+
bowtie2 $RG --very-sensitive --no-discordant -p {threads} --maxins 2000 -x {input.bowtie2_index} --met-file "{output.bowtie_met}" {params.interleaved} - 2> "{output.bowtie_log}" | \
4749
samblaster {params.add_mate_tags} 2> "{output.samblaster_log}" | \
4850
samtools sort -o "{output.bam}" - 2>> "{output.samtools_log}";
4951
@@ -59,15 +61,15 @@ rule tss_coverage:
5961
input:
6062
bam = os.path.join(result_path,"results","{sample}","mapped","{sample}.filtered.bam"),
6163
bai = os.path.join(result_path,"results","{sample}","mapped","{sample}.filtered.bam.bai"),
64+
chromosome_sizes = config["chromosome_sizes"],
65+
unique_tss = config["unique_tss"],
6266
output:
6367
tss_hist = os.path.join(result_path,"results","{sample}","{sample}.tss_histogram.csv"),
6468
params:
6569
noise_upper = ( config["tss_slop"] * 2 ) - config["noise_lower"],
6670
double_slop = ( config["tss_slop"] * 2 ),
6771
genome_size = config["genome_size"],
6872
tss_slop = config["tss_slop"],
69-
unique_tss = config["unique_tss"],
70-
chromosome_sizes = config["chromosome_sizes"],
7173
noise_lower = config["noise_lower"],
7274
resources:
7375
mem_mb=config.get("mem", "16000"),
@@ -79,7 +81,7 @@ rule tss_coverage:
7981
shell:
8082
"""
8183
echo "base,count" > {output.tss_hist};
82-
bedtools slop -b {params.tss_slop} -i {params.unique_tss} -g {params.chromosome_sizes} | \
84+
bedtools slop -b {params.tss_slop} -i {input.unique_tss} -g {input.chromosome_sizes} | \
8385
bedtools coverage -a - -b {input.bam} -d -sorted | \
8486
awk '{{if($6 == "+"){{ counts[$7] += $8;}} else counts[{params.double_slop} - $7 + 1] += $8;}} END {{ for(pos in counts) {{ if(pos < {params.noise_lower} || pos > {params.noise_upper}) {{ noise += counts[pos] }} }}; average_noise = noise /(2 * {params.noise_lower}); for(pos in counts) {{print pos-2000-1","(counts[pos]/average_noise) }} }}' | \
8587
sort -t "," -k1,1n >> {output.tss_hist} ;
@@ -91,6 +93,7 @@ rule peak_calling:
9193
bam = os.path.join(result_path,"results","{sample}","mapped", "{sample}.filtered.bam"),
9294
bai = os.path.join(result_path,"results","{sample}","mapped", "{sample}.filtered.bam.bai"),
9395
homer_script = os.path.join(HOMER_path,"configureHomer.pl"),
96+
regulatory_regions = config["regulatory_regions"],
9497
output:
9598
peak_calls = os.path.join(result_path,"results","{sample}","peaks","{sample}_peaks.narrowPeak"),
9699
peak_annot = os.path.join(result_path,"results","{sample}","peaks","{sample}_peaks.narrowPeak.annotated.tsv"),
@@ -108,7 +111,6 @@ rule peak_calling:
108111
formating = lambda w: '--format BAMPE' if samples["{}".format(w.sample)]["read_type"] == "paired" else '--format BAM',
109112
genome_size = config["genome_size"],
110113
genome = config["genome"],
111-
regulatory_regions = config["regulatory_regions"],
112114
keep_dup = config['macs2_keep_dup'],
113115
resources:
114116
mem_mb=config.get("mem", "16000"),
@@ -136,7 +138,7 @@ rule peak_calling:
136138
137139
samtools view -c -L {output.peak_calls} {input.bam} | awk -v total=$TOTAL_READS '{{print "frip\t" $1/total}}' >> "{output.stats}";
138140
139-
samtools view -c -L {params.regulatory_regions} {input.bam} | awk -v total=$TOTAL_READS '{{print "regulatory_fraction\t" $1/total}}' >> "{output.stats}";
141+
samtools view -c -L {input.regulatory_regions} {input.bam} | awk -v total=$TOTAL_READS '{{print "regulatory_fraction\t" $1/total}}' >> "{output.stats}";
140142
141143
if [ ! -f {output.homer_knownResults} ]; then
142144
touch {output.homer_knownResults}

workflow/rules/quantification.smk

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,15 @@ rule sample_annotation:
1919
# generate promoter regions using (py)bedtools
2020
rule get_promoter_regions:
2121
input:
22-
config["gencode_gtf"],
22+
gencode_gtf = config["gencode_gtf"],
23+
chromosome_sizes = config["chromosome_sizes"],
24+
genome_fasta = config["genome_fasta"],
2325
output:
2426
promoter_regions = os.path.join(result_path,"counts","promoter_regions.bed"),
2527
promoter_annot = os.path.join(result_path,"counts","promoter_annotation.csv"),
28+
params:
29+
proximal_size_up = config["proximal_size_up"],
30+
proximal_size_dn = config["proximal_size_dn"],
2631
resources:
2732
mem_mb = config.get("mem", "16000"),
2833
threads: config.get("threads", 2)
@@ -37,6 +42,8 @@ rule get_promoter_regions:
3742
rule get_consensus_regions:
3843
input:
3944
summits_bed = expand(os.path.join(result_path,"results","{sample}","peaks","{sample}_summits.bed"), sample=samples_quantify),
45+
blacklisted_regions = config["blacklisted_regions"],
46+
chromosome_sizes = config["chromosome_sizes"],
4047
output:
4148
consensus_regions = os.path.join(result_path,"counts","consensus_regions.bed"),
4249
resources:
@@ -54,8 +61,11 @@ rule quantify_support_sample:
5461
input:
5562
consensus_regions = os.path.join(result_path,"counts","consensus_regions.bed"),
5663
peakfile = os.path.join(result_path,"results","{sample}","peaks", "{sample}_summits.bed"),
64+
chromosome_sizes = config["chromosome_sizes"],
5765
output:
5866
quant_support = os.path.join(result_path,"results","{sample}","peaks", "{sample}_quantification_support_counts.csv"),
67+
params:
68+
slop_extension = config["slop_extension"],
5969
resources:
6070
mem_mb=config.get("mem", "16000"),
6171
threads: config.get("threads", 2)
@@ -71,6 +81,7 @@ rule quantify_counts_sample:
7181
input:
7282
regions = os.path.join(result_path,"counts","{kind}_regions.bed"),
7383
bamfile = os.path.join(result_path,"results","{sample}","mapped", "{sample}.filtered.bam"),
84+
chromosome_sizes = config["chromosome_sizes"],
7485
output:
7586
quant_counts = os.path.join(result_path,"results","{sample}","mapped", "{sample}_quantification_{kind}_counts.csv"),
7687
resources:

workflow/rules/region_annotation.smk

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,16 @@ rule uropa_prepare:
66
consensus_regions = os.path.join(result_path,"counts","consensus_regions.bed"),
77
gencode_template = workflow.source_path(config["gencode_template"]),
88
regulatory_template = workflow.source_path(config["regulatory_template"]),
9+
gencode_gtf = config["gencode_gtf"],
10+
regulatory_build_gtf = config["regulatory_build_gtf"],
911
output:
1012
gencode_config = os.path.join(result_path,"tmp","consensus_regions_gencode.json"),
1113
reg_config = os.path.join(result_path,"tmp","consensus_regions_reg.json"),
14+
params:
15+
tss_size = config['tss_size'],
16+
proximal_size_up = config["proximal_size_up"],
17+
proximal_size_dn = config["proximal_size_dn"],
18+
distal_size = config['distal_size'],
1219
resources:
1320
mem_mb=config.get("mem", "16000"),
1421
threads: config.get("threads", 2)
@@ -20,11 +27,11 @@ rule uropa_prepare:
2027
gencode_template=Template(f.read())
2128

2229
gencode_config=gencode_template.substitute({
23-
'TSS_flanking':config['tss_size'],
24-
'TSS_proximal_upstream':config['proximal_size_up'],
25-
'TSS_proximal_downstream':config['proximal_size_dn'],
26-
'distal_distance':config['distal_size'],
27-
'gtf_file':'"{}"'.format(config["gencode_gtf"]),
30+
'TSS_flanking':'"{}"'.format(params.tss_size),
31+
'TSS_proximal_upstream':'"{}"'.format(params.proximal_size_up),
32+
'TSS_proximal_downstream':'"{}"'.format(params.proximal_size_dn),
33+
'distal_distance':'"{}"'.format(params.distal_size),
34+
'gtf_file':'"{}"'.format(input.gencode_gtf),
2835
'bed_file':'"{}"'.format(input.consensus_regions)
2936
})
3037

@@ -36,7 +43,7 @@ rule uropa_prepare:
3643
reg_template=Template(f.read())
3744

3845
reg_config=reg_template.substitute({
39-
'gtf_file':'"{}"'.format(config["regulatory_build_gtf"]),
46+
'gtf_file':'"{}"'.format(input.regulatory_build_gtf),
4047
'bed_file':'"{}"'.format(input.consensus_regions)
4148
})
4249

@@ -116,10 +123,9 @@ rule homer_region_annotation:
116123
rule bedtools_annotation:
117124
input:
118125
consensus_regions = os.path.join(result_path,"counts","consensus_regions.bed"),
126+
genome_fasta = config["genome_fasta"],
119127
output:
120128
bedtools_annotation = os.path.join(result_path, "tmp", "bedtools_annotation.bed"),
121-
params:
122-
genome_fasta = config["genome_fasta"],
123129
resources:
124130
mem_mb=config.get("mem", "16000"),
125131
threads: config.get("threads", 2)
@@ -129,7 +135,7 @@ rule bedtools_annotation:
129135
"logs/rules/bedtools_annotation.log"
130136
shell:
131137
"""
132-
bedtools nuc -fi {params.genome_fasta} -bed {input.consensus_regions} > {output.bedtools_annotation}
138+
bedtools nuc -fi {input.genome_fasta} -bed {input.consensus_regions} > {output.bedtools_annotation}
133139
"""
134140

135141
# aggregate uropa and homer annotation results

workflow/scripts/get_consensus_regions.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@
99

1010
# input
1111
peakfiles = snakemake.input["summits_bed"]
12-
blacklist_file = snakemake.config["blacklisted_regions"]
13-
chrom_file = snakemake.config["chromosome_sizes"]
12+
blacklist_file = snakemake.input["blacklisted_regions"]
13+
chrom_file = snakemake.input["chromosome_sizes"]
1414

1515
# output
1616
consensus_regions_path = snakemake.output["consensus_regions"]
1717

1818
# parameters
19-
slop_extension=snakemake.config["slop_extension"]
19+
slop_extension = snakemake.params["slop_extension"]
2020

2121
# load summits and generate consensus regions using (py)bedtools
2222
output_bed = None

workflow/scripts/get_promoter_regions.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,17 @@ def get_promoter(feature, upstream, downstream, chrom_sizes):
3737
#### configurations
3838

3939
# input
40-
gtf_file = snakemake.config["gencode_gtf"]
41-
chrom_file = snakemake.config["chromosome_sizes"]
40+
gtf_file = snakemake.input["gencode_gtf"]
41+
chrom_file = snakemake.input["chromosome_sizes"]
42+
genome_fasta_path = snakemake.input["genome_fasta"]
4243

4344
# output
4445
promoter_regions_path = snakemake.output["promoter_regions"]
4546
promoter_annot_path = snakemake.output["promoter_annot"]
4647

4748
# parameters
48-
TSS_up = snakemake.config["proximal_size_up"]
49-
TSS_dn = snakemake.config["proximal_size_dn"]
50-
genome_fasta_path = snakemake.config["genome_fasta"]
49+
TSS_up = snakemake.params["proximal_size_up"]
50+
TSS_dn = snakemake.params["proximal_size_dn"]
5151

5252
# load the genome annotation file using pybedtools
5353
gtf = bedtools.BedTool(gtf_file)

workflow/scripts/quantify_counts_sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# input
1010
regions_path = snakemake.input["regions"]
1111
bamfile_path = snakemake.input["bamfile"]
12-
chrom_file = snakemake.config["chromosome_sizes"]
12+
chrom_file = snakemake.input["chromosome_sizes"]
1313

1414
# output
1515
quant_count_path = snakemake.output["quant_counts"]

workflow/scripts/quantify_support_sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# input
1111
consensus_regions_path = snakemake.input["consensus_regions"]
1212
peakfile_path = snakemake.input["peakfile"]
13-
chrom_file = snakemake.config["chromosome_sizes"]
13+
chrom_file = snakemake.input["chromosome_sizes"]
1414

1515
# output
1616
quant_support_path = snakemake.output["quant_support"]

0 commit comments

Comments
 (0)