Skip to content

Commit 680f2f4

Browse files
Dom LaetschDom Laetsch
authored andcommitted
Development
1 parent 3be2abb commit 680f2f4

File tree

7 files changed

+93
-69
lines changed

7 files changed

+93
-69
lines changed

comparecov.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@
2121
2222
-p, --plotgroups INT Number of (taxonomic) groups to plot, remaining
2323
groups are placed in 'other' [default: 7]
24-
-r, --rank RANK Taxonomic rank used for colouring of blobs [default: superkingdom]
25-
(Supported: superkingdom)
24+
-r, --rank RANK Taxonomic rank used for colouring of blobs [default: phylum]
2625
-x, --taxrule TAXRULE Taxrule which has been used for computing taxonomy
2726
(Supported: bestsum, bestsumorder) [default: bestsum]
2827
--sort <ORDER> Sort order for plotting [default: span]

create.py

Lines changed: 4 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -36,64 +36,19 @@
3636
import lib.BtCore as bt
3737
import lib.BtLog as BtLog
3838
import lib.BtIO as BtIO
39+
import lib.BtInput as BtInput
3940
import os.path
4041

4142

4243
if __name__ == '__main__':
43-
ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet']
44+
4445
main_dir = os.path.dirname(__file__)
4546
#print data_dir
4647
args = docopt(__doc__)
4748
#print args
48-
fasta_f = args['--infile']
49-
fasta_type = args['--type']
5049

51-
sam_fs = args['--sam']
52-
bam_fs = args['--bam']
53-
cov_fs = args['--cov']
54-
cas_fs = args['--cas']
55-
hit_fs = args['--taxfile']
56-
57-
out_f = args['--out']
58-
if (out_f):
59-
out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json")
60-
else:
61-
out_f = "%s" % ("BlobDB.json")
62-
nodesDB_f = args['--db']
63-
names_f = args['--names']
64-
nodes_f = args['--nodes']
65-
taxrules = args['--taxrule']
66-
title = args['--title'] if (args['--title']) else out_f
67-
68-
69-
# Do files exist ?
70-
files = [x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs + [names_f] + [nodes_f] + hit_fs) if x is not None]
71-
for f in files:
72-
if not os.path.isfile(f):
73-
BtLog.error('0', f)
74-
75-
# Is taxonomy provided?
76-
if nodesDB_f == "data/nodesDB.txt":
77-
nodesDB_f = os.path.join(main_dir, nodesDB_f)
78-
if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)):
79-
BtLog.error('3')
80-
81-
if not (hit_fs):
82-
BtLog.error('18')
83-
84-
# can FASTA parser deal with assemblies
85-
if not fasta_type in ASSEMBLY_TYPES:
86-
BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:]))
87-
88-
# Is coverage provided?
89-
if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
90-
BtLog.error('1')
50+
title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f = BtInput.validate_input_create(main_dir, args)
9151

92-
cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
93-
[bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
94-
[bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
95-
[bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)]
96-
9752
# Create BlobDB object
9853
blobDb = bt.BlobDb(title)
9954

@@ -103,8 +58,7 @@
10358
blobDb.parseCovs(cov_libs)
10459

10560
# Parse Tax
106-
hitLibs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)]
107-
blobDb.parseHits(hitLibs)
61+
blobDb.parseHits(hit_libs)
10862

10963
# Parse nodesDB
11064
nodesDB, nodesDB_f = BtIO.getNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f)

lib/BtCore.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -295,18 +295,6 @@ def computeTaxonomy(self, taxrules, nodesDB):
295295
blObj.taxonomy[taxrule] = BtTax.taxRule(taxrule, blObj.hits, self.lineages)
296296
else:
297297
blObj.taxonomy[taxrule] = BtTax.noHit()
298-
299-
def counts(self):
300-
count_dict = {
301-
'seqs' : self.seqs,
302-
'length' : self.length,
303-
'Ns' : self.n_count,
304-
'AvgCov' : {lib : round(covlibObj.cov_sum/self.seqs, 2) for lib, covlibObj in self.covLibs.items()},
305-
'GC' : round(sum([blObj.gc for blObj in self.dict_of_blobs.values()])/self.seqs, 2),
306-
'MappedReads' : {lib : (covlibObj.reads_mapped) for lib, covlibObj in self.covLibs.items()},
307-
'TotalReads' : {lib : (covlibObj.reads_total) for lib, covlibObj in self.covLibs.items()}
308-
}
309-
print count_dict
310298

311299
def getBlobs(self):
312300
for blObj in [self.dict_of_blobs[key] for key in self.order_of_blobs]:

lib/BtIO.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,9 @@ def readCov(infile, set_of_blobs):
147147
cov_line_re = re.compile(r"^(\S+)\t(\d+\.*\d*)")
148148
cov_dict = {}
149149
seqs_parsed = 0
150-
progress_unit = int(len(set_of_blobs)/100)
150+
progress_unit = 1
151151
with open(infile) as fh:
152152
for line in fh:
153-
BtLog.progress(seqs_parsed, 10, len(set_of_blobs))
154153
match = cov_line_re.search(line)
155154
if match:
156155
seqs_parsed += 1

lib/BtInput.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
"""
5+
File : BtInput.py
6+
Version : 0.1
7+
Author : Dominik R. Laetsch, dominik.laetsch at gmail dot com
8+
Bugs : ?
9+
To do : ?
10+
"""
11+
12+
from __future__ import division
13+
import re
14+
import subprocess
15+
from os.path import basename, isfile, abspath
16+
import os
17+
import lib.BtLog as BtLog
18+
import lib.BtCore as bt
19+
20+
def validate_input_create(main_dir, args):
21+
'''
22+
Accepts:
23+
- main_dir
24+
- docopt args
25+
Returns:
26+
- title
27+
- fasta_f
28+
- fasta_type
29+
- cov_libs
30+
- hit_libs
31+
- nodesDB_f
32+
- taxrules
33+
- out_f
34+
'''
35+
ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet']
36+
37+
fasta_f = args['--infile']
38+
fasta_type = args['--type']
39+
sam_fs = args['--sam']
40+
bam_fs = args['--bam']
41+
cov_fs = args['--cov']
42+
cas_fs = args['--cas']
43+
hit_fs = args['--taxfile']
44+
out_f = args['--out']
45+
if (out_f):
46+
out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json")
47+
else:
48+
out_f = "%s" % ("BlobDB.json")
49+
nodesDB_f = args['--db']
50+
names_f = args['--names']
51+
nodes_f = args['--nodes']
52+
taxrules = args['--taxrule']
53+
title = args['--title'] if (args['--title']) else out_f
54+
55+
# Do files exist ?
56+
files = [x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs + [names_f] + [nodes_f] + hit_fs) if x is not None]
57+
for f in files:
58+
if not os.path.isfile(f):
59+
BtLog.error('0', f)
60+
61+
# Is taxonomy provided?
62+
if nodesDB_f == "data/nodesDB.txt":
63+
nodesDB_f = os.path.join(main_dir, nodesDB_f)
64+
if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)):
65+
BtLog.error('3')
66+
if not (hit_fs):
67+
BtLog.error('18')
68+
# can FASTA parser deal with assemblies
69+
if not fasta_type in ASSEMBLY_TYPES:
70+
BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:]))
71+
# Is coverage provided?
72+
if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
73+
BtLog.error('1')
74+
cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
75+
[bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
76+
[bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
77+
[bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)]
78+
79+
hit_libs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)]
80+
81+
return title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f
82+
83+
if __name__ == "__main__":
84+
pass

lib/BtPlot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def compute_stats(self):
299299
# gather data
300300
for group, labels in self.group_labels.items():
301301
for label in labels:
302-
stats[label]['name'] = self.data_dict[group]['name']
302+
stats[label]['name'] = stats[label]['name'] + self.data_dict[group]['name']
303303
stats[label]['groups'].add(group)
304304
stats[label]['gc'] = stats[label]['gc'] + self.data_dict[group]['gc']
305305
stats[label]['length'] = stats[label]['length'] + self.data_dict[group]['length']

lib/BtTax.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,8 @@ def getLineages(tree_lists, nodesDB):
5050
return lineage
5151

5252
def taxRule(taxrule, hits, lineages):
53-
taxonomy = { rank : {'tax' : '', 'score' : 0.0, 'c_index' : 0 } for rank in RANKS}
54-
tempTax = { rank : {} for rank in RANKS}
53+
taxonomy = { rank : {'tax' : '', 'score' : 0.0, 'c_index' : 0 } for rank in RANKS }
54+
tempTax = { rank : {} for rank in RANKS }
5555
taxDict = getTaxDict(hits, lineages) # here libs are separated
5656
if taxrule == 'bestsum':
5757
for lib in sorted(taxDict):

0 commit comments

Comments
 (0)