Skip to content

Commit 3b282fd

Browse files
authored
Create count_frequency_busco_family.py
1 parent e69ee8f commit 3b282fd

File tree

1 file changed

+86
-0
lines changed

1 file changed

+86
-0
lines changed

count_frequency_busco_family.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/usr/bin/env python
2+
3+
# count_frequence_busco_family.py
4+
# 2020 Jamie McGowan <jamie.mcgowan@mu.ie>
5+
#
6+
# Reports how many species a BUSCO family was found to be single copy in
7+
# ONLY LOOKS AT SINGLE COPIES, IGNORES IF PRESENT AS MULTI COPY
8+
# Usage: python count_frequency_busco_family.py busco_working_directory
9+
10+
import os, sys
11+
12+
if len(sys.argv) < 2:
13+
print("Usage: python count_frequency_busco_family.py busco_working_directory")
14+
sys.exit(0)
15+
16+
working_directory = os.path.abspath(sys.argv[1])
17+
18+
busco_dirs = []
19+
20+
for item in os.listdir("."):
21+
if item[0:4] == "run_":
22+
if os.path.isdir(item):
23+
busco_dirs.append(item)
24+
25+
n_busco_runs = len(busco_dirs)
26+
27+
print("Found " + str(n_busco_runs) + " BUSCO runs")
28+
print("")
29+
print("BUSCO Run\t#Single Copy BUSCOs")
30+
31+
all_species = []
32+
all_buscos = set()
33+
busco_per_species = {}
34+
35+
for directory in busco_dirs:
36+
os.chdir(working_directory)
37+
species = directory.split("run_")[1]
38+
all_species.append(species)
39+
buscos = []
40+
41+
os.chdir(directory)
42+
os.chdir("single_copy_busco_sequences")
43+
44+
45+
for busco in os.listdir("."):
46+
if busco.endswith(".faa"):
47+
busco_name = busco[0:len(busco) - 4]
48+
buscos.append(busco_name)
49+
all_buscos.add(busco_name)
50+
51+
print(species + "\t" + str(len(buscos)))
52+
busco_per_species[species] = buscos
53+
54+
print("\n\n")
55+
56+
all_buscos_count = {}
57+
for busco in all_buscos:
58+
all_buscos_count[busco] = 0
59+
60+
for species in all_species:
61+
if busco in busco_per_species[species]:
62+
all_buscos_count[busco] += 1
63+
64+
print("BUSCO\t#Species\t%Species")
65+
for busco in all_buscos:
66+
percent = ((all_buscos_count[busco] / float(n_busco_runs)) * 100)
67+
percent = "{:.2f}".format(percent)
68+
print(busco + "\t" + str(all_buscos_count[busco]) + '\t' + percent)
69+
70+
71+
print("\n\n")
72+
73+
# Print presence/absence matrix of all found buscos per species
74+
line = ["BUSCO"] + all_species
75+
print ("\t".join(line))
76+
77+
for busco in all_buscos:
78+
line = [busco]
79+
for species in all_species:
80+
if busco in busco_per_species[species]:
81+
line.append("Y")
82+
else:
83+
line.append("N")
84+
85+
print ("\t".join(line))
86+

0 commit comments

Comments
 (0)