9
9
from __future__ import division
10
10
import re
11
11
import subprocess
12
- from os .path import basename , isfile , abspath , splitext , join , isdir
13
- import shutil
14
12
import os
15
- import sys
13
+ from os .path import basename , isfile , splitext , join , isdir
14
+ import shutil
16
15
import bloblib .BtLog as BtLog
17
- from collections import deque
18
16
19
17
20
18
def create_dir (directory = "" , overwrite = True ):
21
- if ( directory ) :
19
+ if directory :
22
20
if not isdir (directory ):
23
21
os .makedirs (directory )
24
22
else :
25
- if ( overwrite ) :
23
+ if overwrite :
26
24
shutil .rmtree (directory ) #removes all the subdirectories!
27
25
os .makedirs (directory )
28
26
return directory
@@ -31,7 +29,7 @@ def create_dir(directory="", overwrite=True):
31
29
32
30
def parseList (infile ):
33
31
if not isfile (infile ):
34
- BtLog .error ('0' , infile )
32
+ BtLog .error ('0' , infile )
35
33
with open (infile ) as fh :
36
34
items = []
37
35
for l in fh :
@@ -40,7 +38,7 @@ def parseList(infile):
40
38
41
39
def parseReferenceCov (infile ):
42
40
refcov_dict = {}
43
- if ( infile ) :
41
+ if infile :
44
42
if not isfile (infile ):
45
43
BtLog .error ('0' , infile )
46
44
with open (infile ) as fh :
@@ -55,7 +53,7 @@ def parseReferenceCov(infile):
55
53
56
54
def parseCmdlist (temp ):
57
55
_list = []
58
- if ( temp ) :
56
+ if temp :
59
57
if "," in temp :
60
58
_list = temp .split ("," )
61
59
else :
@@ -65,7 +63,7 @@ def parseCmdlist(temp):
65
63
def parseCmdLabels (labels ):
66
64
label_d = {}
67
65
name , groups = '' , ''
68
- if ( labels ) :
66
+ if labels :
69
67
try :
70
68
for label in labels :
71
69
name , groups = str (label ).split ("=" )
@@ -80,7 +78,7 @@ def parseCmdLabels(labels):
80
78
81
79
def parseCatColour (infile ):
82
80
catcolour_dict = {}
83
- if ( infile ) :
81
+ if infile :
84
82
if not isfile (infile ):
85
83
BtLog .error ('0' , infile )
86
84
with open (infile ) as fh :
@@ -94,7 +92,7 @@ def parseCatColour(infile):
94
92
95
93
def parseDict (infile , key , value ):
96
94
items = {}
97
- if ( infile ) :
95
+ if infile :
98
96
if not isfile (infile ):
99
97
BtLog .error ('0' , infile )
100
98
with open (infile ) as fh :
@@ -108,7 +106,7 @@ def parseDict(infile, key, value):
108
106
109
107
def parseColours (infile ):
110
108
items = {}
111
- if ( infile ) :
109
+ if infile :
112
110
if not isfile (infile ):
113
111
BtLog .error ('0' , infile )
114
112
with open (infile ) as fh :
@@ -119,7 +117,7 @@ def parseColours(infile):
119
117
120
118
def parseSet (infile ):
121
119
if not isfile (infile ):
122
- BtLog .error ('0' , infile )
120
+ BtLog .error ('0' , infile )
123
121
with open (infile ) as fh :
124
122
items = set ()
125
123
for l in fh :
@@ -134,12 +132,12 @@ def parseFastaNameOrder(infile):
134
132
135
133
def readFasta (infile ):
136
134
if not isfile (infile ):
137
- BtLog .error ('0' , infile )
135
+ BtLog .error ('0' , infile )
138
136
with open (infile ) as fh :
139
137
header , seqs = '' , []
140
138
for l in fh :
141
139
if l [0 ] == '>' :
142
- if ( header ) :
140
+ if header :
143
141
yield header , '' .join (seqs )
144
142
header , seqs = l [1 :- 1 ].split ()[0 ], [] # Header is split at first whitespace
145
143
else :
@@ -173,8 +171,8 @@ def is_exe(fpath):
173
171
def checkBam (infile ):
174
172
print BtLog .status_d ['10' ]
175
173
if not isfile (infile ):
176
- BtLog .error ('0' , infile )
177
- if not ( which ('samtools' ) ):
174
+ BtLog .error ('0' , infile )
175
+ if not which ('samtools' ):
178
176
BtLog .error ('7' )
179
177
reads_mapped_re = re .compile (r"(\d+)\s\+\s\d+\smapped" )
180
178
reads_secondary_re = re .compile (r"(\d+)\s\+\s\d+\ssecondary" )
@@ -189,14 +187,15 @@ def checkBam(infile):
189
187
reads_mapped = reads_mapped - reads_secondary
190
188
reads_total = int (reads_total_re .search (output ).group (1 ))
191
189
# check whether there are reads in BAM
192
- if not (reads_total ) or not (reads_mapped ):
193
- BtLog .error ('29' % infile )
194
- print BtLog .status_d ['11' ] % ('{:,}' .format (reads_mapped ), '{:,}' .format (reads_total ), '{0:.1%}' .format (reads_mapped / reads_total ))
190
+ if not reads_total or not reads_mapped :
191
+ BtLog .error ('29' % infile )
192
+ print BtLog .status_d ['11' ] % ('{:,}' .format (reads_mapped ), \
193
+ '{:,}' .format (reads_total ), '{0:.1%}' .format (reads_mapped / reads_total ))
195
194
return reads_total , reads_mapped
196
195
197
196
def parseSam (infile , set_of_blobs , no_base_cov_flag ):
198
197
if not isfile (infile ):
199
- BtLog .error ('0' , infile )
198
+ BtLog .error ('0' , infile )
200
199
base_cov_dict = {blob : [] for blob in set_of_blobs }
201
200
read_cov_dict = {blob : 0 for blob in set_of_blobs }
202
201
cigar_match_re = re .compile (r"(\d+)M|X|=" ) # only gets digits before M,X,='s
@@ -241,15 +240,15 @@ def parseBam(infile, set_of_blobs, no_base_cov_flag):
241
240
242
241
'''
243
242
if not isfile (infile ):
244
- BtLog .error ('0' , infile )
243
+ BtLog .error ('0' , infile )
245
244
reads_total , reads_mapped = checkBam (infile )
246
245
progress_unit = int (reads_mapped / 1000 )
247
246
base_cov_dict = {blob : [] for blob in set_of_blobs }
248
247
#base_cov_dict = {blob : 0 for blob in set_of_blobs}
249
248
read_cov_dict = {blob : 0 for blob in set_of_blobs }
250
249
cigar_match_re = re .compile (r"(\d+)M|X|=" ) # only gets digits before M,X,='s
251
250
# execute samtools to get only mapped reads (no optial duplicates, no 2nd-ary alignment)
252
- command = "samtools view -F 1028 -F 4 -F 256 " + infile
251
+ command = "samtools view -F 1024 -F 4 -F 256 " + infile
253
252
seen_reads = 0
254
253
#import time
255
254
#start = time.time()
@@ -308,7 +307,7 @@ def parseCovFromHeader(fasta_type, header):
308
307
309
308
def parseCov (infile , set_of_blobs ):
310
309
if not isfile (infile ):
311
- BtLog .error ('0' , infile )
310
+ BtLog .error ('0' , infile )
312
311
old_cov_line_re = re .compile (r"^(\S+)\t(\d+\.*\d*)" )
313
312
base_cov_dict = {}
314
313
@@ -361,7 +360,7 @@ def parseCov(infile, set_of_blobs):
361
360
def checkCas (infile ):
362
361
print BtLog .status_d ['12' ]
363
362
if not isfile (infile ):
364
- BtLog .error ('0' , infile )
363
+ BtLog .error ('0' , infile )
365
364
if not (which ('clc_mapping_info' )):
366
365
BtLog .error ('20' )
367
366
seqs_total_re = re .compile (r"\s+Contigs\s+(\d+)" )
@@ -380,7 +379,7 @@ def checkCas(infile):
380
379
381
380
def parseCas (infile , order_of_blobs ):
382
381
if not isfile (infile ):
383
- BtLog .error ('0' , infile )
382
+ BtLog .error ('0' , infile )
384
383
seqs_total , reads_total , reads_mapped = checkCas (infile )
385
384
progress_unit = int (len (order_of_blobs )/ 100 )
386
385
cas_line_re = re .compile (r"\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+.\d{2})\s+(\d+)\s+(\d+.\d{2})" )
@@ -413,7 +412,7 @@ def readTax(infile, set_of_blobs):
413
412
- add as key-value pairs to hitDict
414
413
'''
415
414
if not isfile (infile ):
416
- BtLog .error ('0' , infile )
415
+ BtLog .error ('0' , infile )
417
416
hit_line_re = re .compile (r"^(\S+)\s+(\d+)[\;?\d+]*\s+(\d+\.*\d*)" ) # TEST TEST , if not split it afterwards
418
417
with open (infile ) as fh :
419
418
for line in fh :
@@ -507,7 +506,7 @@ def readNamesNodes(names_f, nodes_f):
507
506
for line in fh :
508
507
names_col = line .split ("\t " )
509
508
if names_col [6 ] == "scientific name" :
510
- nodesDB [names_col [0 ]]['name' ] = names_col [2 ]
509
+ nodesDB [names_col [0 ]]['name' ] = names_col [2 ]
511
510
nodesDB ['nodes_count' ] = nodes_count
512
511
return nodesDB
513
512
@@ -545,7 +544,7 @@ def byteify(input):
545
544
http://stackoverflow.com/a/13105359
546
545
'''
547
546
if isinstance (input , dict ):
548
- return {byteify (key ):byteify (value ) for key ,value in input .iteritems ()}
547
+ return {byteify (key ):byteify (value ) for key , value in input .iteritems ()}
549
548
elif isinstance (input , list ):
550
549
return [byteify (element ) for element in input ]
551
550
elif isinstance (input , unicode ):
@@ -580,7 +579,7 @@ def parseJsonGzip(infile):
580
579
def parseJson (infile ):
581
580
'''http://artem.krylysov.com/blog/2015/09/29/benchmark-python-json-libraries/'''
582
581
if not isfile (infile ):
583
- BtLog .error ('0' , infile )
582
+ BtLog .error ('0' , infile )
584
583
import time
585
584
start = time .time ()
586
585
json_parser = ''
0 commit comments