|
8 | 8 | import math
|
9 | 9 | import numpy as np
|
10 | 10 |
|
| 11 | +def create_ratio_header(replicate_number): |
| 12 | + replicate_cond1 = [] |
| 13 | + replicate_cond2 = [] |
| 14 | + for replicate in range(1, replicate_number+1): |
| 15 | + cond1 = "Ratio-A-" + str(replicate) |
| 16 | + replicate_cond1.append(cond1) |
| 17 | + cond2 = "Ratio-B-" + str(replicate) |
| 18 | + replicate_cond2.append(cond2) |
| 19 | + cond1string = '\t'.join(map(str,replicate_cond1)) |
| 20 | + cond2string = '\t'.join(map(str,replicate_cond2)) |
| 21 | + replicate_header = cond1string + "\t" + cond2string |
| 22 | + return replicate_header |
11 | 23 |
|
12 | 24 | def uORF_change(uORFrowIn, ORFreadsIn):
|
13 | 25 | uORFrow = uORFrowIn
|
14 | 26 | ORFreads = ORFreadsIn
|
15 |
| - replicates = math.ceil(len(uORFrow)/2) |
| 27 | + replicate_number = math.ceil(len(uORFrow)/2) |
16 | 28 | uorf1sum = 0
|
17 | 29 | orf1sum = 0
|
18 | 30 | uorf2sum = 0
|
19 | 31 | orf2sum = 0
|
20 | 32 | changesum = 0
|
21 |
| - for replicate in range(0, replicates): |
| 33 | + cond1_ratios = [] |
| 34 | + cond2_ratios = [] |
| 35 | + for replicate in range(0, replicate_number): |
22 | 36 | uORFCond1 = uORFrow[replicate] + 1
|
23 | 37 | orfCond1 = ORFreads[replicate] + 1
|
24 |
| - uORFCond2 = uORFrow[replicate + replicates] + 1 |
25 |
| - orfCond2 = ORFreads[replicate + replicates] + 1 |
| 38 | + uORFCond2 = uORFrow[replicate + replicate_number] + 1 |
| 39 | + orfCond2 = ORFreads[replicate + replicate_number] + 1 |
26 | 40 | ratio1 = orfCond1 / uORFCond1
|
| 41 | + cond1_ratios.append(ratio1) |
27 | 42 | ratio2 = orfCond2 / uORFCond2
|
| 43 | + cond2_ratios.append(ratio2) |
28 | 44 | change = ratio1 / ratio2
|
29 | 45 | uorf1sum += uORFCond1
|
30 | 46 | orf1sum += orfCond1
|
31 | 47 | uorf2sum += uORFCond2
|
32 | 48 | orf2sum += orfCond2
|
33 | 49 | changesum += change
|
34 |
| - averageuORF1 = uorf1sum / replicates |
35 |
| - averageORF1 = orf1sum / replicates |
36 |
| - averageuORF2 = uorf2sum / replicates |
37 |
| - averageORF2 = orf2sum / replicates |
38 |
| - averagechange = changesum / replicates |
| 50 | + averageuORF1 = uorf1sum / replicate_number |
| 51 | + averageORF1 = orf1sum / replicate_number |
| 52 | + averageuORF2 = uorf2sum / replicate_number |
| 53 | + averageORF2 = orf2sum / replicate_number |
| 54 | + averagechange = changesum / replicate_number |
39 | 55 | logaveragechange = math.log2(averagechange)
|
40 |
| - return (averagechange,averageuORF1,averageORF1,averageuORF2,averageORF2,logaveragechange) |
| 56 | + return (cond1_ratios,cond2_ratios,logaveragechange) |
41 | 57 |
|
42 | 58 | def uORF_changes(uorf_table, uorf_reads_dict, orf_reads_dict):
|
43 |
| - averagechanges = [] |
44 |
| - averageuORF1s = [] |
45 |
| - averageORF1s = [] |
46 |
| - averageuORF2s = [] |
47 |
| - averageORF2s = [] |
48 |
| - logaveragechanges = [] |
| 59 | + output = [] |
49 | 60 | for _, uORFrow in uorf_table.iterrows():
|
50 | 61 | uORFid = uORFrow['uORFids']
|
51 | 62 | ORFid = uORFrow['transcript_id']
|
52 | 63 | uORFreads = uorf_reads_dict[uORFid]
|
53 | 64 | ORFreads = orf_reads_dict[ORFid]
|
54 |
| - (averagechange,averageuORF1,averageORF1,averageuORF2,averageORF2,logaveragechange) = uORF_change(uORFreads, ORFreads) |
55 |
| - averageuORF1s.append(averageuORF1) |
56 |
| - averageORF1s.append(averageORF1) |
57 |
| - averageuORF2s.append(averageuORF2) |
58 |
| - averageORF2s.append(averageORF2) |
59 |
| - averagechanges.append(averagechange) |
60 |
| - logaveragechanges.append(logaveragechange) |
61 |
| - uorf_table['averageuORF1'] = averageuORF1s |
62 |
| - uorf_table['averageORF1'] = averageORF1s |
63 |
| - uorf_table['averageuORF2'] = averageuORF2s |
64 |
| - uorf_table['averageORF2'] = averageORF2s |
65 |
| - uorf_table['logaveragechange'] = logaveragechanges |
66 |
| - output = [] |
67 |
| - for _, uORFrow2 in uorf_table.iterrows(): |
68 |
| - joined_row = '\t'.join(map(str, uORFrow2)) |
69 |
| - uORF_changes_string = joined_row |
| 65 | + (cond1ratios,cond2ratios,logaveragechange) = uORF_change(uORFreads, ORFreads) |
| 66 | + annotation_row = '\t'.join(map(str, uORFrow)) |
| 67 | + cond1ratiosstring = '\t'.join(map(str,cond1ratios)) |
| 68 | + cond2ratiosstring = '\t'.join(map(str,cond2ratios)) |
| 69 | + uORF_changes_string=annotation_row + "\t" + cond1ratiosstring + "\t" + cond2ratiosstring + "\t" + str(logaveragechange) |
70 | 70 | output.append(uORF_changes_string)
|
71 | 71 | return (output)
|
72 | 72 |
|
@@ -98,8 +98,10 @@ def main():
|
98 | 98 | orf_reads.columns = orf_cols
|
99 | 99 | orf_reads_dict = orf_reads.set_index('ID').T.to_dict('list')
|
100 | 100 | df_final = create_output(args)
|
| 101 | + replicate_number = math.ceil(len(uorf_reads.columns)/2) |
101 | 102 | changes_list = uORF_changes(df_final, uorf_reads_dict, orf_reads_dict)
|
102 |
| - changes_header = "coordinates\tgene_symbol\ttranscript_id\tuORF_id\tmean_reads_uORF_c1\tmean_reads_ORF_c1\tmean_reads_uORF_c2\tmean_reads_ORF_c2\tlog2FC_main_ORF_to_uORF_ratios\n" |
| 103 | + ratios_header = create_ratio_header(replicate_number) |
| 104 | + changes_header = "coordinates\tgene_symbol\ttranscript_id\tuORF_id\t" + ratios_header + "\tlog2FC_main_ORF_to_uORF_ratios\n" |
103 | 105 | changes_string = changes_header + '\n'.join(map(str, changes_list))
|
104 | 106 | f = open(args.output_csv_filepath, 'wt', encoding='utf-8')
|
105 | 107 | f.write(changes_string)
|
|
0 commit comments