@@ -7,6 +7,7 @@ rule align:
7
7
output_bai = os .path .join (result_path ,"results" ,"{sample}" ,"mapped" , "{sample}.bam.bai" ),
8
8
filtered_bam = os .path .join (result_path ,"results" ,"{sample}" ,"mapped" , "{sample}.filtered.bam" ),
9
9
filtered_bai = os .path .join (result_path ,"results" ,"{sample}" ,"mapped" , "{sample}.filtered.bam.bai" ),
10
+ bowtie2_index = config ["bowtie2_index" ],
10
11
bowtie_log = os .path .join (result_path , 'results' , "{sample}" , 'mapped' , '{sample}.txt' ),
11
12
bowtie_met = os .path .join (result_path , 'results' , "{sample}" , 'mapped' , '{sample}.bowtie2.met' ),
12
13
fastp_html = os .path .join (result_path , 'results' , "{sample}" , 'mapped' , '{sample}.fastp.html' ),
@@ -16,6 +17,8 @@ rule align:
16
17
samtools_log = os .path .join (result_path , 'results' , "{sample}" , 'mapped' , '{sample}.samtools.log' ),
17
18
samtools_flagstat_log = os .path .join (result_path , 'results' , "{sample}" , 'mapped' , '{sample}.samtools_flagstat.log' ),
18
19
stats = os .path .join (result_path , 'results' , "{sample}" , '{sample}.align.stats.tsv' ),
20
+ adapter_fasta = config ["adapter_fasta" ] if config ["adapter_fasta" ]!= "" else [],
21
+ whitelisted_regions = config ["whitelisted_regions" ],
19
22
params :
20
23
interleaved_in = lambda w : "--interleaved_in" if samples ["{}" .format (w .sample )]["read_type" ] == "paired" else " " ,
21
24
interleaved = lambda w : "--interleaved" if samples ["{}" .format (w .sample )]["read_type" ] == "paired" else " " ,
@@ -26,7 +29,6 @@ rule align:
26
29
sequencing_platform = config ["sequencing_platform" ],
27
30
sequencing_center = config ["sequencing_center" ],
28
31
mitochondria_name = config ["mitochondria_name" ],
29
- bowtie2_index = config ["bowtie2_index" ],
30
32
resources :
31
33
mem_mb = config .get ("mem" , "16000" ),
32
34
threads : 4 * config .get ("threads" , 2 )
@@ -43,7 +45,7 @@ rule align:
43
45
44
46
for i in {input}; do samtools fastq $i 2>> "{output.samtools_log}" ; done | \
45
47
fastp {params.adapter_sequence} {params.adapter_fasta} --stdin {params.interleaved_in} --stdout --html "{output.fastp_html}" --json "{output.fastp_json}" 2> "{output.fastp_log}" | \
46
- bowtie2 $RG --very-sensitive --no-discordant -p {threads} --maxins 2000 -x {params .bowtie2_index} --met-file "{output.bowtie_met}" {params.interleaved} - 2> "{output.bowtie_log}" | \
48
+ bowtie2 $RG --very-sensitive --no-discordant -p {threads} --maxins 2000 -x {input .bowtie2_index} --met-file "{output.bowtie_met}" {params.interleaved} - 2> "{output.bowtie_log}" | \
47
49
samblaster {params.add_mate_tags} 2> "{output.samblaster_log}" | \
48
50
samtools sort -o "{output.bam}" - 2>> "{output.samtools_log}";
49
51
@@ -59,15 +61,15 @@ rule tss_coverage:
59
61
input :
60
62
bam = os .path .join (result_path ,"results" ,"{sample}" ,"mapped" ,"{sample}.filtered.bam" ),
61
63
bai = os .path .join (result_path ,"results" ,"{sample}" ,"mapped" ,"{sample}.filtered.bam.bai" ),
64
+ chromosome_sizes = config ["chromosome_sizes" ],
65
+ unique_tss = config ["unique_tss" ],
62
66
output :
63
67
tss_hist = os .path .join (result_path ,"results" ,"{sample}" ,"{sample}.tss_histogram.csv" ),
64
68
params :
65
69
noise_upper = ( config ["tss_slop" ] * 2 ) - config ["noise_lower" ],
66
70
double_slop = ( config ["tss_slop" ] * 2 ),
67
71
genome_size = config ["genome_size" ],
68
72
tss_slop = config ["tss_slop" ],
69
- unique_tss = config ["unique_tss" ],
70
- chromosome_sizes = config ["chromosome_sizes" ],
71
73
noise_lower = config ["noise_lower" ],
72
74
resources :
73
75
mem_mb = config .get ("mem" , "16000" ),
@@ -79,7 +81,7 @@ rule tss_coverage:
79
81
shell :
80
82
"""
81
83
echo "base,count" > {output.tss_hist};
82
- bedtools slop -b {params.tss_slop} -i {params .unique_tss} -g {params .chromosome_sizes} | \
84
+ bedtools slop -b {params.tss_slop} -i {input .unique_tss} -g {input .chromosome_sizes} | \
83
85
bedtools coverage -a - -b {input.bam} -d -sorted | \
84
86
awk '{{if($6 == "+"){{ counts[$7] += $8;}} else counts[{params.double_slop} - $7 + 1] += $8;}} END {{ for(pos in counts) {{ if(pos < {params.noise_lower} || pos > {params.noise_upper}) {{ noise += counts[pos] }} }}; average_noise = noise /(2 * {params.noise_lower}); for(pos in counts) {{print pos-2000-1","(counts[pos]/average_noise) }} }}' | \
85
87
sort -t "," -k1,1n >> {output.tss_hist} ;
@@ -91,6 +93,7 @@ rule peak_calling:
91
93
bam = os .path .join (result_path ,"results" ,"{sample}" ,"mapped" , "{sample}.filtered.bam" ),
92
94
bai = os .path .join (result_path ,"results" ,"{sample}" ,"mapped" , "{sample}.filtered.bam.bai" ),
93
95
homer_script = os .path .join (HOMER_path ,"configureHomer.pl" ),
96
+ regulatory_regions = config ["regulatory_regions" ],
94
97
output :
95
98
peak_calls = os .path .join (result_path ,"results" ,"{sample}" ,"peaks" ,"{sample}_peaks.narrowPeak" ),
96
99
peak_annot = os .path .join (result_path ,"results" ,"{sample}" ,"peaks" ,"{sample}_peaks.narrowPeak.annotated.tsv" ),
@@ -108,7 +111,6 @@ rule peak_calling:
108
111
formating = lambda w : '--format BAMPE' if samples ["{}" .format (w .sample )]["read_type" ] == "paired" else '--format BAM' ,
109
112
genome_size = config ["genome_size" ],
110
113
genome = config ["genome" ],
111
- regulatory_regions = config ["regulatory_regions" ],
112
114
keep_dup = config ['macs2_keep_dup' ],
113
115
resources :
114
116
mem_mb = config .get ("mem" , "16000" ),
@@ -136,7 +138,7 @@ rule peak_calling:
136
138
137
139
samtools view -c -L {output.peak_calls} {input.bam} | awk -v total=$TOTAL_READS '{{print "frip\t " $1/total}}' >> "{output.stats}";
138
140
139
- samtools view -c -L {params .regulatory_regions} {input.bam} | awk -v total=$TOTAL_READS '{{print "regulatory_fraction\t " $1/total}}' >> "{output.stats}";
141
+ samtools view -c -L {input .regulatory_regions} {input.bam} | awk -v total=$TOTAL_READS '{{print "regulatory_fraction\t " $1/total}}' >> "{output.stats}";
140
142
141
143
if [ ! -f {output.homer_knownResults} ]; then
142
144
touch {output.homer_knownResults}
0 commit comments