@@ -107,54 +107,84 @@ def load(self, BlobDb_f):
107
107
self .lineages = blobDict ['lineages' ]
108
108
self .set_of_taxIds = blobDict ['lineages' ].keys ()
109
109
self .order_of_blobs = blobDict ['order_of_blobs' ]
110
- self .dict_of_blobs = blobDict ['dict_of_blobs' ] # this will probably not work
110
+ self .dict_of_blobs = blobDict ['dict_of_blobs' ]
111
111
self .length = int (blobDict ['length' ])
112
112
self .seqs = int (blobDict ['seqs' ])
113
113
self .n_count = int (blobDict ['n_count' ])
114
114
self .covLibs = blobDict ['covLibs' ]
115
115
self .hitLibs = blobDict ['hitLibs' ]
116
116
self .taxrules = blobDict ['taxrules' ]
117
117
118
- def getArrays (self , rank , min_length , hide_nohits , taxrule , c_index , label_d ):
119
- from numpy import array
120
- summary_dict = {}
121
- data_list = []
122
- cov_dict = {covLib : [] for covLib in self .covLibs }
118
+ def getPlotData (self , rank , min_length , hide_nohits , taxrule , c_index ):
119
+ data_dict = {}
120
+ read_cov_dict = {}
121
+ max_cov = 0.0
122
+ cov_libs = self .covLibs .keys ()
123
+ cov_libs_reads_total = {cov_lib : data ['reads_total' ] for cov_lib , data in self .covLibs .items ()}
124
+
123
125
for blob in self .dict_of_blobs .values ():
124
- name = blob ['name' ]
125
- gc = blob ['gc' ]
126
- length = blob ['length' ]
127
- tax = ''
128
- if (c_index ):
129
- tax = str (blob ['taxonomy' ][taxrule ][rank ]['c_index' ])
130
- else :
131
- tax = blob ['taxonomy' ][taxrule ][rank ]['tax' ]
132
- if label_d and tax in label_d :
133
- tax = label_d [tax ]
134
- if not tax in summary_dict :
135
- summary_dict [tax ] = {'count_total' : 0 ,
136
- 'count_hidden' : 0 ,
137
- 'count_visible' : 0 ,
138
- 'span_total' : 0 ,
139
- 'span_hidden' : 0 ,
140
- 'span_visible' : 0 }
141
- if ((hide_nohits ) and tax == 'no-hit' ) or length < min_length :
142
- summary_dict [tax ]['count_hidden' ] = summary_dict [tax ].get ('count_hidden' , 0 ) + 1
143
- summary_dict [tax ]['span_hidden' ] = summary_dict [tax ].get ('span_hidden' , 0 ) + length
144
- else :
145
- data_list .append ([(name ), (length ), (gc ), (tax )])
146
- for covLib in self .covLibs :
147
- cov = float (blob ['covs' ][covLib ])
148
- if cov < 0.1 :
149
- cov = 0.1
150
- cov_dict [covLib ].append (cov )
151
- summary_dict [tax ]['count_visible' ] = summary_dict [tax ].get ('count_visible' , 0 ) + 1
152
- summary_dict [tax ]['span_visible' ] = summary_dict [tax ].get ('span_visible' , 0 ) + int (length )
153
- summary_dict [tax ]['count_total' ] = summary_dict [tax ].get ('count_total' , 0 ) + 1
154
- summary_dict [tax ]['span_total' ] = summary_dict [tax ].get ('span_total' , 0 ) + int (length )
155
- data_array = array (data_list )
156
- cov_arrays = {covLib : array (cov ) for covLib , cov in cov_dict .items ()}
157
- return data_array , cov_arrays , summary_dict
126
+ name , gc , length , group = blob ['name' ], blob ['gc' ], blob ['length' ], ''
127
+
128
+ if (c_index ): # annotation with c_index instead of taxonomic group
129
+ group = str (blob ['taxonomy' ][taxrule ][rank ]['c_index' ])
130
+ else : # annotation with taxonomic group
131
+ group = str (blob ['taxonomy' ][taxrule ][rank ]['tax' ])
132
+
133
+ if not group in data_dict :
134
+ data_dict [group ] = {
135
+ 'name' : [],
136
+ 'length' : [],
137
+ 'gc' : [],
138
+ 'covs' : {covLib : [] for covLib in cov_libs },
139
+ 'reads_mapped' : {covLib : 0 for covLib in cov_libs },
140
+ 'count' : 0 ,
141
+ 'count_hidden' : 0 ,
142
+ 'count_visible' : 0 ,
143
+ 'span' : 0 ,
144
+ 'span_hidden' : 0 ,
145
+ 'span_visible' : 0 ,
146
+ }
147
+ if len (cov_libs ) > 1 :
148
+ data_dict [group ]['covs' ]['sum' ] = []
149
+ data_dict [group ]['reads_mapped' ]['sum' ] = 0
150
+
151
+ if ((hide_nohits ) and group == 'no-hit' ) or length < min_length : # hidden
152
+ data_dict [group ]['count_hidden' ] = data_dict [group ].get ('count_hidden' , 0 ) + 1
153
+ data_dict [group ]['span_hidden' ] = data_dict [group ].get ('span_hidden' , 0 ) + int (length )
154
+ else : # visible
155
+ data_dict [group ]['count_visible' ] = data_dict [group ].get ('count_visible' , 0 ) + 1
156
+ data_dict [group ]['span_visible' ] = data_dict [group ].get ('span_visible' , 0 ) + int (length )
157
+
158
+ data_dict [group ]['name' ].append (name )
159
+ data_dict [group ]['length' ].append (length )
160
+ data_dict [group ]['gc' ].append (gc )
161
+
162
+ cov_sum = 0.0
163
+ reads_mapped_sum = 0
164
+ for cov_lib in sorted (cov_libs ):
165
+ cov = float (blob ['covs' ][cov_lib ])
166
+ cov_sum += cov
167
+ cov = cov if cov > 0.02 else 0.02
168
+ if cov > max_cov :
169
+ max_cov = cov
170
+ data_dict [group ]['covs' ][cov_lib ].append (cov )
171
+ if cov_lib in blob ['read_cov' ]:
172
+ reads_mapped = blob ['read_cov' ][cov_lib ]
173
+ reads_mapped_sum += reads_mapped
174
+ data_dict [group ]['reads_mapped' ][cov_lib ] += reads_mapped
175
+
176
+ if len (cov_libs ) > 1 :
177
+ cov_sum = cov_sum if cov_sum > 0.02 else 0.02
178
+ data_dict [group ]['covs' ]['sum' ].append (cov_sum )
179
+ if cov > max_cov :
180
+ max_cov = cov
181
+ if (reads_mapped_sum ):
182
+ data_dict [group ]['reads_mapped' ]['sum' ] += reads_mapped_sum
183
+
184
+ data_dict [group ]['count' ] = data_dict [group ].get ('count' , 0 ) + 1
185
+ data_dict [group ]['span' ] = data_dict [group ].get ('span' , 0 ) + int (length )
186
+
187
+ return data_dict , max_cov , cov_libs , cov_libs_reads_total
158
188
159
189
def addCovLib (self , covLib ):
160
190
self .covLibs [covLib .name ] = covLib
@@ -166,8 +196,7 @@ def parseFasta(self, fasta_f, fasta_type):
166
196
self .assembly_f = abspath (fasta_f )
167
197
if (fasta_type ):
168
198
# Set up CovLibObj for coverage in assembly header
169
- cov_lib = CovLibObj (fasta_type , fasta_type , fasta_f )
170
- self .covLibs [covLib .name ] = covLib
199
+ self .covLibs [fasta_type ] = CovLibObj (fasta_type , fasta_type , fasta_f )
171
200
172
201
for name , seq in BtIO .readFasta (fasta_f ):
173
202
blObj = BlObj (name , seq )
@@ -178,7 +207,7 @@ def parseFasta(self, fasta_f, fasta_type):
178
207
179
208
if (fasta_type ):
180
209
cov = BtIO .parseCovFromHeader (fasta_type , blObj .name )
181
- covLib .cov_sum += cov
210
+ self . covLibs [ fasta_type ] .cov_sum += cov
182
211
blObj .addCov (fasta_type , cov )
183
212
184
213
self .order_of_blobs .append (blObj .name )
@@ -196,32 +225,45 @@ def parseCovs(self, covLibObjs):
196
225
if covLib .fmt == 'bam' or covLib .fmt == 'sam' :
197
226
base_cov_dict = {}
198
227
if covLib .fmt == 'bam' :
199
- base_cov_dict , covLib .total_reads , covLib .mapped_reads , covLib . read_cov_dict = BtIO .readBam (covLib .f , set (self .dict_of_blobs ))
228
+ base_cov_dict , covLib .reads_total , covLib .reads_mapped , read_cov_dict = BtIO .readBam (covLib .f , set (self .dict_of_blobs ))
200
229
else :
201
- base_cov_dict , covLib .total_reads , covLib .mapped_reads , covLib .read_cov_dict = BtIO .readSam (covLib .f , set (self .dict_of_blobs ))
230
+ base_cov_dict , covLib .reads_total , covLib .reads_mapped , read_cov_dict = BtIO .readSam (covLib .f , set (self .dict_of_blobs ))
231
+ if covLib .reads_total == 0 :
232
+ print BtLog .warn_d ['4' ] % covLib .f
202
233
for name , base_cov in base_cov_dict .items ():
203
234
cov = base_cov / self .dict_of_blobs [name ].agct_count
204
235
covLib .cov_sum += cov
205
236
self .dict_of_blobs [name ].addCov (covLib .name , cov )
237
+ self .dict_of_blobs [name ].read_cov = {covLib .name : read_cov_dict [name ]}
206
238
elif covLib .fmt == 'cas' :
207
- for name , cov in BtIO .readCas (covLib .f , self .order_of_blobs ):
239
+ cov_dict , covLib .reads_total , covLib .reads_mapped , read_cov_dict = BtIO .readCas (covLib .f , self .order_of_blobs )
240
+ if covLib .reads_total == 0 :
241
+ print BtLog .warn_d ['4' ] % covLib .f
242
+ for name , cov in cov_dict .items ():
208
243
covLib .cov_sum += cov
209
244
self .dict_of_blobs [name ].addCov (covLib .name , cov )
245
+ self .dict_of_blobs [name ].read_cov = {covLib .name : read_cov_dict [name ]}
210
246
elif covLib .fmt == 'cov' :
211
- for name , cov in BtIO .readCov (covLib .f , set (self .dict_of_blobs )):
212
- covLib .cov_sum += cov
213
- self .dict_of_blobs [name ].addCov (covLib .name , cov )
247
+ cov_dict = BtIO .readCov (covLib .f , set (self .dict_of_blobs ))
248
+ if not len (cov_dict ) == self .seqs :
249
+ print BtLog .warn_d ['4' ] % covLib .f
250
+ covLib .cov_sum += cov
251
+ self .dict_of_blobs [name ].addCov (covLib .name , cov )
214
252
else :
215
253
pass
216
254
covLib .mean_cov = covLib .cov_sum / self .seqs
217
255
self .covLibs [covLib .name ] = covLib
218
256
257
+
219
258
def parseHits (self , hitLibs ):
220
259
for hitLib in hitLibs :
221
260
self .hitLibs [hitLib .name ] = hitLib
222
261
print BtLog .status_d ['1' ] % (hitLib .name , hitLib .f )
223
262
# only accepts format 'seqID\ttaxID\tscore'
224
263
for hitDict in BtIO .readTax (hitLib .f , set (self .dict_of_blobs )):
264
+ if ";" in hitDict ['taxId' ]:
265
+ hitDict ['taxId' ] = hitDict ['taxId' ].split (";" )[0 ]
266
+ print BtLog .warn ['5' ] % (hitDict ['name' ], hitLib )
225
267
self .set_of_taxIds .add (hitDict ['taxId' ])
226
268
self .dict_of_blobs [hitDict ['name' ]].addHits (hitLib .name , hitDict )
227
269
@@ -246,8 +288,8 @@ def counts(self):
246
288
'Ns' : self .n_count ,
247
289
'AvgCov' : {lib : round (covlibObj .cov_sum / self .seqs , 2 ) for lib , covlibObj in self .covLibs .items ()},
248
290
'GC' : round (sum ([blObj .gc for blObj in self .dict_of_blobs .values ()])/ self .seqs , 2 ),
249
- 'MappedReads' : {lib : (covlibObj .mapped_reads ) for lib , covlibObj in self .covLibs .items ()},
250
- 'TotalReads' : {lib : (covlibObj .total_reads ) for lib , covlibObj in self .covLibs .items ()}
291
+ 'MappedReads' : {lib : (covlibObj .reads_mapped ) for lib , covlibObj in self .covLibs .items ()},
292
+ 'TotalReads' : {lib : (covlibObj .reads_total ) for lib , covlibObj in self .covLibs .items ()}
251
293
}
252
294
print count_dict
253
295
@@ -263,6 +305,7 @@ def __init__(self, name, seq):
263
305
self .agct_count = self .length - self .n_count
264
306
self .gc = round (self .calculateGC (seq ), 4 )
265
307
self .covs = {}
308
+ self .read_cov = {}
266
309
self .hits = {}
267
310
self .taxonomy = {}
268
311
@@ -284,9 +327,8 @@ def __init__(self, name, fmt, f):
284
327
self .fmt = fmt
285
328
self .f = abspath (f )
286
329
self .cov_sum = 0
287
- self .total_reads = 0
288
- self .mapped_reads = 0
289
- self .read_cov_dict = {}
330
+ self .reads_total = 0
331
+ self .reads_mapped = 0
290
332
self .mean_cov = 0.0
291
333
292
334
class hitLibObj ():
0 commit comments