4
4
"""
5
5
File : BtIO.py
6
6
Version : 0.1
7
- Author : Dominik R. Laetsch, dominik.laetsch at gmail dot com
7
+ Author : Dominik R. Laetsch, dominik.laetsch at gmail dot com
8
8
Bugs : ?
9
9
To do : ?
10
10
"""
@@ -25,9 +25,9 @@ def parseList(infile):
25
25
return seqs
26
26
27
27
def readFasta (infile ):
28
- with open (infile ) as fh :
28
+ with open (infile ) as fh :
29
29
header , seqs = '' , []
30
- for l in fh :
30
+ for l in fh :
31
31
if l [0 ] == '>' :
32
32
if (header ):
33
33
yield header , '' .join (seqs )
@@ -89,15 +89,15 @@ def readSam(infile, set_of_blobs):
89
89
if match >= 11 :
90
90
reads_total += 1
91
91
seq_name = match [2 ]
92
- if not seq_name == '*' :
92
+ if not seq_name == '*' :
93
93
if seq_name not in set_of_blobs :
94
94
print BtLog .warn_d ['2' ] % (seq_name , infile )
95
95
base_cov = sum ([int (matching ) for matching in cigar_match_re .findall (match [5 ])])
96
96
if (base_cov ):
97
97
reads_mapped += 1
98
- base_cov_dict [seq_name ] = base_cov_dict .get (seq_name , 0 ) + base_cov
99
- read_cov_dict [seq_name ] = read_cov_dict .get (seq_name , 0 ) + 1
100
- return base_cov_dict , reads_total , reads_mapped , read_cov_dict
98
+ base_cov_dict [seq_name ] = base_cov_dict .get (seq_name , 0 ) + base_cov
99
+ read_cov_dict [seq_name ] = read_cov_dict .get (seq_name , 0 ) + 1
100
+ return base_cov_dict , reads_total , reads_mapped , read_cov_dict
101
101
102
102
def readBam (infile , set_of_blobs ):
103
103
reads_total , reads_mapped = checkBam (infile )
@@ -111,7 +111,7 @@ def readBam(infile, set_of_blobs):
111
111
#command = "samtools view -F 1028 " + infile
112
112
# only one counter since only yields mapped reads
113
113
seen_reads = 0
114
- parsed_reads = 0
114
+ parsed_reads = 0
115
115
for line in runCmd (command ):
116
116
match = line .split ("\t " )
117
117
seen_reads += 1
@@ -122,16 +122,16 @@ def readBam(infile, set_of_blobs):
122
122
if seq_name not in set_of_blobs :
123
123
print BtLog .warn_d ['2' ] % (seq_name , infile )
124
124
else :
125
- base_cov_dict [seq_name ] = base_cov_dict .get (seq_name , 0 ) + base_cov
126
- read_cov_dict [seq_name ] = read_cov_dict .get (seq_name , 0 ) + 1
125
+ base_cov_dict [seq_name ] = base_cov_dict .get (seq_name , 0 ) + base_cov
126
+ read_cov_dict [seq_name ] = read_cov_dict .get (seq_name , 0 ) + 1
127
127
BtLog .progress (seen_reads , progress_unit , reads_mapped )
128
128
if not int (reads_mapped ) == int (parsed_reads ):
129
129
print warn_d ['3' ] % (reads_mapped , parsed_reads )
130
130
return base_cov_dict , reads_total , parsed_reads , read_cov_dict
131
131
132
132
def parseCovFromHeader (fasta_type , header ):
133
- '''
134
- Returns the coverage from the header of a FASTA
133
+ '''
134
+ Returns the coverage from the header of a FASTA
135
135
sequence depending on the assembly type
136
136
'''
137
137
if fasta_type == 'spades' :
@@ -187,12 +187,12 @@ def readCas(infile, order_of_blobs):
187
187
command = "clc_mapping_info -n " + infile
188
188
cov_dict = {}
189
189
read_cov_dict = {}
190
- seqs_parsed = 0
190
+ seqs_parsed = 0
191
191
if (runCmd (command )):
192
192
for line in runCmd (command ):
193
193
cas_line_match = cas_line_re .search (line )
194
194
if cas_line_match :
195
- idx = int (cas_line_match .group (1 )) - 1 # -1 because index of contig list starts with zero
195
+ idx = int (cas_line_match .group (1 )) - 1 # -1 because index of contig list starts with zero
196
196
try :
197
197
name = order_of_blobs [idx ]
198
198
reads = int (cas_line_match .group (3 ))
@@ -239,26 +239,23 @@ def parseColourDict(infile):
239
239
240
240
def getNodesDB (** kwargs ):
241
241
'''
242
- Parsing names.dmp and nodes.dmp into the 'nodes_db' dict of dicts that
243
- gets JSON'ed into blobtools/data/nodes_db.json if this file
244
- does not exist. This file is used if neither "--names" and "--nodes"
242
+ Parsing names.dmp and nodes.dmp into the 'nodes_db' dict of dicts that
243
+ gets JSON'ed into blobtools/data/nodes_db.json if this file
244
+ does not exist. This file is used if neither "--names" and "--nodes"
245
245
nor "--db" is specified.
246
246
'''
247
247
nodesDB = {}
248
- nodesDB_f = ''
249
- if (kwargs ['nodesDB' ]):
250
- print BtLog .status_d ['4' ] % (kwargs ['nodesDB' ])
251
- nodesDB = readNodesDB (kwargs ['nodesDB' ])
252
- nodesDB_f = kwargs ['nodesDB' ]
253
- elif (kwargs ['names' ] and kwargs ['nodes' ]):
248
+ nodesDB_f = ''
249
+
250
+ if (kwargs ['names' ] and kwargs ['nodes' ]):
254
251
print BtLog .status_d ['3' ] % (kwargs ['nodes' ], kwargs ['names' ])
255
252
nodesDB = {}
256
253
nodes_count = 0
257
254
with open (kwargs ['nodes' ]) as fh :
258
255
for line in fh :
259
256
nodes_col = line .split ("\t " )
260
257
node = {}
261
- node_id = nodes_col [0 ]
258
+ node_id = nodes_col [0 ]
262
259
node ['parent' ] = nodes_col [2 ]
263
260
node ['rank' ] = nodes_col [4 ]
264
261
nodesDB [node_id ] = node
@@ -270,6 +267,10 @@ def getNodesDB(**kwargs):
270
267
nodesDB [names_col [0 ]]['name' ] = names_col [2 ]
271
268
nodesDB_f = kwargs ['nodesDB' ]
272
269
nodesDB ['nodes_count' ] = nodes_count
270
+ elif (kwargs ['nodesDB' ]):
271
+ print BtLog .status_d ['4' ] % (kwargs ['nodesDB' ])
272
+ nodesDB = readNodesDB (kwargs ['nodesDB' ])
273
+ nodesDB_f = kwargs ['nodesDB' ]
273
274
else :
274
275
BtLog .error ('3' )
275
276
return nodesDB , nodesDB_f
@@ -293,9 +294,9 @@ def writeNodesDB(nodesDB, nodesDB_f):
293
294
nodes_count = nodesDB ['nodes_count' ]
294
295
i = 0
295
296
with open (nodesDB_f , 'w' ) as fh :
296
- fh .write ("# nodes_count = %s\n " % nodes_count )
297
+ fh .write ("# nodes_count = %s\n " % nodes_count )
297
298
for node in nodesDB :
298
- if not node == "nodes_count" :
299
+ if not node == "nodes_count" :
299
300
i += 1
300
301
BtLog .progress (i , 1000 , nodes_count )
301
302
fh .write ("%s\t %s\t %s\t %s\n " % (node , nodesDB [node ]['rank' ], nodesDB [node ]['name' ], nodesDB [node ]['parent' ]))
@@ -316,26 +317,26 @@ def byteify(input):
316
317
def writeJsonGzip (obj , outfile ):
317
318
import json
318
319
import gzip
319
- with gzip .open (outfile , 'wb' ) as fh :
320
+ with gzip .open (outfile , 'wb' ) as fh :
320
321
json .dump (obj , fh )
321
322
322
323
def writeJson (obj , outfile ):
323
324
import json
324
- with open (outfile , 'w' ) as fh :
325
+ with open (outfile , 'w' ) as fh :
325
326
json .dump (obj , fh )
326
327
327
328
def readJsonGzip (infile ):
328
329
import json
329
330
import gzip
330
- with gzip .open (infile , 'rb' ) as fh :
331
+ with gzip .open (infile , 'rb' ) as fh :
331
332
obj = json .loads (fh .read ().decode ("ascii" ))
332
333
return byteify (obj )
333
334
334
335
def readJson (infile ):
335
336
import json
336
- with open (infile , 'r' ) as fh :
337
+ with open (infile , 'r' ) as fh :
337
338
obj = json .loads (fh .read ().decode ("ascii" ))
338
339
return byteify (obj )
339
340
340
- if __name__ == "__main__" :
341
+ if __name__ == "__main__" :
341
342
pass
0 commit comments