@@ -107,26 +107,60 @@ def dump(self):
107
107
108
108
def load (self , BlobDb_f ):
109
109
blobDict = BtIO .readJson (BlobDb_f )
110
- self .title = blobDict ['title' ]
111
- self .assembly_f = blobDict ['assembly_f' ]
112
- self .nodesDB_f = blobDict ['nodesDB_f' ]
113
- self .lineages = blobDict ['lineages' ]
110
+ for k , v in blobDict .items ():
111
+ setattr (self , k , v )
114
112
self .set_of_taxIds = blobDict ['lineages' ].keys ()
115
- self .order_of_blobs = blobDict ['order_of_blobs' ]
116
- self .dict_of_blobs = blobDict ['dict_of_blobs' ]
117
- self .length = int (blobDict ['length' ])
118
- self .seqs = int (blobDict ['seqs' ])
119
- self .n_count = int (blobDict ['n_count' ])
120
- self .covLibs = blobDict ['covLibs' ]
121
- self .hitLibs = blobDict ['hitLibs' ]
122
- self .taxrules = blobDict ['taxrules' ]
113
+ #for k, v in self.__dict__.items():
114
+ # print k, type(v), v # this seems to work
115
+
116
+ #self.title = blobDict['title']
117
+ #self.assembly_f = blobDict['assembly_f']
118
+ #self.nodesDB_f = blobDict['nodesDB_f']
119
+ #self.lineages = blobDict['lineages']
120
+ #self.set_of_taxIds = blobDict['lineages'].keys()
121
+ #self.order_of_blobs = blobDict['order_of_blobs']
122
+ #self.dict_of_blobs = blobDict['dict_of_blobs']
123
+ #self.length = int(blobDict['length'])
124
+ #self.seqs = int(blobDict['seqs'])
125
+ #self.n_count = int(blobDict['n_count'])
126
+ #self.covLibs = blobDict['covLibs']
127
+ #self.hitLibs = blobDict['hitLibs']
128
+ #self.taxrules = blobDict['taxrules']
129
+
130
+ # self.title = title
131
+ # self.assembly_f = ''
132
+ # self.dict_of_blobs = {}
133
+ # self.order_of_blobs = []
134
+ # self.set_of_taxIds = set()
135
+ # self.lineages = {}
136
+ # self.length = 0
137
+ # self.seqs = 0
138
+ # self.n_count = 0
139
+ # self.covLibs = {}
140
+ # self.hitLibs = {}
141
+ # self.nodesDB_f = ''
142
+ # self.taxrules = []
123
143
124
144
def getPlotData (self , rank , min_length , hide_nohits , taxrule , c_index , catcolour_dict ):
125
145
data_dict = {}
126
146
read_cov_dict = {}
127
147
max_cov = 0.0
128
- cov_libs = self .covLibs .keys ()
129
- cov_libs_reads_total = {cov_lib : data ['reads_total' ] for cov_lib , data in self .covLibs .items ()}
148
+ print self .covLibs
149
+ cov_lib_dict = self .covLibs
150
+ #print cov_lib_dict
151
+ cov_lib_names_l = self .covLibs .keys () # does not include cov_sum
152
+
153
+ if len (cov_lib_names_l ) > 1 :
154
+ # more than one cov_lib, cov_sum_lib has to be created
155
+ cov_lib_dict ['sum' ] = CovLibObj ('sum' , 'sum' , None ).__dict__ # ugly
156
+ cov_lib_dict ['sum' ]['reads_total' ] = sum ([cov_lib_dict [x ]['reads_total' ] for x in cov_lib_dict ])
157
+ cov_lib_dict ['sum' ]['reads_mapped' ] = sum ([cov_lib_dict [x ]['reads_mapped' ] for x in cov_lib_dict ])
158
+
159
+ #print self.covLibs
160
+ #cov_libs_reads_total = {cov_lib : data['reads_total'] for cov_lib, data in self.covLibs.items()}
161
+ #print cov_libs_reads_total # correct
162
+ #cov_libs_reads_mapped = {cov_lib : data['reads_mapped'] for cov_lib, data in self.covLibs.items()}
163
+ #print cov_libs_reads_mapped # correct
130
164
131
165
for blob in self .dict_of_blobs .values ():
132
166
name , gc , length , group = blob ['name' ], blob ['gc' ], blob ['length' ], ''
@@ -143,21 +177,19 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
143
177
'name' : [],
144
178
'length' : [],
145
179
'gc' : [],
146
- 'covs' : {covLib : [] for covLib in cov_libs },
147
- 'reads_mapped' : {covLib : 0 for covLib in cov_libs },
180
+ 'covs' : {covLib : [] for covLib in cov_lib_dict . keys ()}, # includes cov_sum if it exists
181
+ 'reads_mapped' : {covLib : 0 for covLib in cov_lib_dict . keys ()}, # includes cov_sum if it exists
148
182
'count' : 0 ,
149
183
'count_hidden' : 0 ,
150
184
'count_visible' : 0 ,
151
185
'span' : 0 ,
152
186
'span_hidden' : 0 ,
153
187
'span_visible' : 0 ,
154
188
}
155
- if len (cov_libs ) > 1 :
156
- data_dict [group ]['covs' ]['cov_sum' ] = []
157
- data_dict [group ]['reads_mapped' ]['cov_sum' ] = 0
158
189
159
190
data_dict [group ]['count' ] = data_dict [group ].get ('count' , 0 ) + 1
160
191
data_dict [group ]['span' ] = data_dict [group ].get ('span' , 0 ) + int (length )
192
+
161
193
if ((hide_nohits ) and group == 'no-hit' ) or length < min_length : # hidden
162
194
data_dict [group ]['count_hidden' ] = data_dict [group ].get ('count_hidden' , 0 ) + 1
163
195
data_dict [group ]['span_hidden' ] = data_dict [group ].get ('span_hidden' , 0 ) + int (length )
@@ -170,35 +202,45 @@ def getPlotData(self, rank, min_length, hide_nohits, taxrule, c_index, catcolour
170
202
171
203
cov_sum = 0.0
172
204
reads_mapped_sum = 0
173
- for cov_lib in sorted (cov_libs ):
174
- cov = float (blob ['covs' ][cov_lib ])
175
- cov_sum += cov
176
- cov = cov if cov > 0.02 else 0.02
205
+ for cov_lib in sorted (cov_lib_names_l ):
206
+ cov = float (blob ['covs' ][cov_lib ])
207
+ if cov < 0.02 :
208
+ cov = 0.02
209
+ #cov = cov if cov > 0.02 else 0.02
210
+ # increase max_cov
177
211
if cov > max_cov :
178
212
max_cov = cov
179
- data_dict [group ]['covs' ][cov_lib ].append (cov )
180
- if cov_lib in blob ['read_cov' ]:
213
+
214
+ # add cov of blob to group
215
+ data_dict [group ]['covs' ][cov_lib ].append (cov )
216
+
217
+ cov_sum += cov
218
+
219
+ # add readcov
220
+ if cov_lib in blob ['read_cov' ]:
181
221
reads_mapped = blob ['read_cov' ][cov_lib ]
182
- reads_mapped_sum += reads_mapped
183
222
data_dict [group ]['reads_mapped' ][cov_lib ] += reads_mapped
223
+ reads_mapped_sum += reads_mapped
184
224
185
- if len (cov_libs ) > 1 :
186
- cov_sum = cov_sum if cov_sum > 0.02 else 0.02
187
- data_dict [group ]['covs' ]['cov_sum' ].append (cov_sum )
188
- if cov > max_cov :
189
- max_cov = cov
225
+ if len (cov_lib_names_l ) > 1 :
226
+ if cov_sum < 0.02 :
227
+ cov_sum = 0.02
228
+ data_dict [group ]['covs' ]['sum' ].append (cov_sum )
229
+ if cov_sum > max_cov :
230
+ max_cov = cov_sum
190
231
if (reads_mapped_sum ):
191
- data_dict [group ]['reads_mapped' ]['cov_sum' ] += reads_mapped_sum
192
-
193
- #data_dict[group]['count'] = data_dict[group].get('count', 0) + 1
194
- #data_dict[group]['span'] = data_dict[group].get('span', 0) + int(length)
232
+ data_dict [group ]['reads_mapped' ]['sum' ] += reads_mapped_sum
233
+ #if len(cov_lib_names_l) > 1:
234
+ # for cov_lib, data in self.covLibs.items():
235
+ # cov_libs_reads_total['cov_sum'] = cov_libs_reads_total.get('cov_sum', 0) + data['reads_total']
236
+
195
237
196
- if len ( cov_libs ) > 1 :
197
- cov_libs . append ( 'cov_sum' )
198
- for cov_lib , data in self . covLibs . items () :
199
- cov_libs_reads_total [ 'cov_sum' ] = cov_libs_reads_total . get ( 'cov_sum' , 0 ) + data [ 'reads_total' ]
238
+ #for group in data_dict :
239
+ # print "#", group
240
+ # for cat in data_dict[group] :
241
+ # print cat, data_dict[group][cat]
200
242
201
- return data_dict , max_cov , cov_libs , cov_libs_reads_total
243
+ return data_dict , max_cov , cov_lib_dict
202
244
203
245
def addCovLib (self , covLib ):
204
246
self .covLibs [covLib .name ] = covLib
@@ -237,26 +279,31 @@ def parseCovs(self, covLibObjs):
237
279
self .addCovLib (covLib )
238
280
print BtLog .status_d ['1' ] % (covLib .name , covLib .f )
239
281
if covLib .fmt == 'bam' or covLib .fmt == 'sam' :
282
+
240
283
base_cov_dict = {}
241
284
if covLib .fmt == 'bam' :
242
285
base_cov_dict , covLib .reads_total , covLib .reads_mapped , read_cov_dict = BtIO .readBam (covLib .f , set (self .dict_of_blobs ))
243
286
else :
244
- base_cov_dict , covLib .reads_total , covLib .reads_mapped , read_cov_dict = BtIO .readSam (covLib .f , set (self .dict_of_blobs ))
287
+ base_cov_dict , covLib .reads_total , covLib .reads_mapped , read_cov_dict = BtIO .readSam (covLib .f , set (self .dict_of_blobs ))
288
+
245
289
if covLib .reads_total == 0 :
246
290
print BtLog .warn_d ['4' ] % covLib .f
291
+
247
292
for name , base_cov in base_cov_dict .items ():
248
293
cov = base_cov / self .dict_of_blobs [name ].agct_count
249
294
covLib .cov_sum += cov
250
295
self .dict_of_blobs [name ].addCov (covLib .name , cov )
251
- self .dict_of_blobs [name ].read_cov = {covLib .name : read_cov_dict [name ]}
296
+ self .dict_of_blobs [name ].addReadCov (covLib .name , read_cov_dict [name ])
297
+
252
298
elif covLib .fmt == 'cas' :
253
299
cov_dict , covLib .reads_total , covLib .reads_mapped , read_cov_dict = BtIO .readCas (covLib .f , self .order_of_blobs )
254
300
if covLib .reads_total == 0 :
255
301
print BtLog .warn_d ['4' ] % covLib .f
256
302
for name , cov in cov_dict .items ():
257
303
covLib .cov_sum += cov
258
304
self .dict_of_blobs [name ].addCov (covLib .name , cov )
259
- self .dict_of_blobs [name ].read_cov = {covLib .name : read_cov_dict [name ]}
305
+ self .dict_of_blobs [name ].addReadCov (covLib .name , read_cov_dict [name ])
306
+
260
307
elif covLib .fmt == 'cov' :
261
308
cov_dict = BtIO .readCov (covLib .f , set (self .dict_of_blobs ))
262
309
if not len (cov_dict ) == self .seqs :
@@ -267,6 +314,8 @@ def parseCovs(self, covLibObjs):
267
314
else :
268
315
pass
269
316
covLib .mean_cov = covLib .cov_sum / self .seqs
317
+ if covLib .cov_sum == 0.0 :
318
+ BtLog .error ('26' , covLib .name )
270
319
self .covLibs [covLib .name ] = covLib
271
320
272
321
@@ -295,18 +344,6 @@ def computeTaxonomy(self, taxrules, nodesDB):
295
344
blObj .taxonomy [taxrule ] = BtTax .taxRule (taxrule , blObj .hits , self .lineages )
296
345
else :
297
346
blObj .taxonomy [taxrule ] = BtTax .noHit ()
298
-
299
- def counts (self ):
300
- count_dict = {
301
- 'seqs' : self .seqs ,
302
- 'length' : self .length ,
303
- 'Ns' : self .n_count ,
304
- 'AvgCov' : {lib : round (covlibObj .cov_sum / self .seqs , 2 ) for lib , covlibObj in self .covLibs .items ()},
305
- 'GC' : round (sum ([blObj .gc for blObj in self .dict_of_blobs .values ()])/ self .seqs , 2 ),
306
- 'MappedReads' : {lib : (covlibObj .reads_mapped ) for lib , covlibObj in self .covLibs .items ()},
307
- 'TotalReads' : {lib : (covlibObj .reads_total ) for lib , covlibObj in self .covLibs .items ()}
308
- }
309
- print count_dict
310
347
311
348
def getBlobs (self ):
312
349
for blObj in [self .dict_of_blobs [key ] for key in self .order_of_blobs ]:
@@ -328,8 +365,11 @@ def calculateGC(self, seq):
328
365
return float ((seq .count ('G' ) + seq .count ('C' ) ) / self .agct_count \
329
366
if self .agct_count > 0 else 0.0 )
330
367
331
- def addCov (self , name , cov ):
332
- self .covs [name ] = cov
368
+ def addCov (self , lib_name , cov ):
369
+ self .covs [lib_name ] = cov
370
+
371
+ def addReadCov (self , lib_name , read_cov ):
372
+ self .read_cov [lib_name ] = read_cov
333
373
334
374
def addHits (self , hitLibName , hitDict ):
335
375
if not hitLibName in self .hits :
@@ -340,8 +380,8 @@ class CovLibObj():
340
380
def __init__ (self , name , fmt , f ):
341
381
self .name = name
342
382
self .fmt = fmt
343
- self .f = abspath (f )
344
- self .cov_sum = 0
383
+ self .f = abspath (f ) if ( f ) else ''
384
+ self .cov_sum = 0.0
345
385
self .reads_total = 0
346
386
self .reads_mapped = 0
347
387
self .mean_cov = 0.0
@@ -350,7 +390,7 @@ class hitLibObj():
350
390
def __init__ (self , name , fmt , f ):
351
391
self .name = name
352
392
self .fmt = fmt
353
- self .f = abspath (f )
393
+ self .f = abspath (f ) if ( f ) else ''
354
394
355
395
if __name__ == '__main__' :
356
396
pass
0 commit comments