|
4 | 4 | import argparse |
5 | 5 | from functools import partial |
6 | 6 | import re |
7 | | -from custom_html import HEAD, build_dbnote, build_table, build_row, FOOT, PAGEBREAK |
| 7 | +from custom_html import HEAD, build_dbnote, build_table,build_table_title, build_row, FOOT, PAGEBREAK |
8 | 8 |
|
9 | 9 |
|
10 | 10 | EXPR_PRIORITY = re.compile(r'ATCC|LMG|type|NCTC') |
11 | | -row_names = "subject_accession species bitscore percent_coverage percent_identity database_name extra_info".split() |
12 | | -build_row_part = partial(build_row, row_names=row_names) |
| 11 | +ROW_NAMES = "subject_accession species bitscore percent_coverage percent_identity database_name extra_info".split() |
13 | 12 |
|
14 | 13 |
|
15 | 14 | def parse_db_csv(filepath): |
@@ -77,50 +76,64 @@ def parse_blast(filepath): |
77 | 76 |
|
78 | 77 | return df.fillna('N/A') |
79 | 78 |
|
80 | | -def main(args): |
81 | | - blast_table = parse_blast(args.blast) |
| 79 | +def build_table_string(name, df, limit=20): |
| 80 | + build_row_partial = partial(build_row, row_names=ROW_NAMES) |
| 81 | + |
| 82 | + if df.shape[0] < limit: |
| 83 | + str_rows = df.apply(build_row_partial, axis=1) |
| 84 | + str_rows = '\n'.join(str_rows) |
| 85 | + str_table = build_table(name, ROW_NAMES, str_rows) |
| 86 | + |
| 87 | + else: |
| 88 | + N = df.shape[0] |
| 89 | + str_rows = df.iloc[0:limit].apply(build_row_partial, axis=1) |
| 90 | + str_rows = '\n'.join(str_rows) |
| 91 | + |
| 92 | + hidden_str_rows = df.iloc[limit+1:min(N, 300)].apply(build_row_partial, axis=1) |
| 93 | + hidden_str_rows = '\n'.join(hidden_str_rows) |
| 94 | + |
| 95 | + str_table = build_table(name, ROW_NAMES, str_rows, hidden_str_rows) |
| 96 | + |
| 97 | + return str_table |
82 | 98 |
|
83 | | - outfile = open(args.output, "w") |
84 | | - outfile.write(HEAD) |
| 99 | +def main(args): |
| 100 | + local_blast_table = parse_blast(args.blast) |
| 101 | + ncbi_blast_table = parse_blast(args.ncbi) |
| 102 | + ncbi_blast_table['extra_info'] = '' |
85 | 103 |
|
86 | 104 | database_df = parse_db_csv(args.db) |
87 | 105 | DBNOTE = build_dbnote(database_df) |
88 | 106 |
|
89 | 107 | extra_info = extract_descriptions(database_df) |
90 | 108 |
|
91 | | - blast_table = blast_table.merge(extra_info, on='subject_accession', how='left') |
92 | | - |
| 109 | + local_blast_table = local_blast_table.merge(extra_info, on='subject_accession', how='left') |
93 | 110 |
|
94 | | - for name, df in blast_table.groupby('query_seq_id'): |
95 | | - |
96 | | - print(name) |
97 | | - |
98 | | - outfile.write(DBNOTE) |
99 | | - |
100 | | - if df.shape[0] < 20: |
101 | | - str_rows = df.apply(build_row_part, axis=1) |
102 | | - str_rows = '\n'.join(str_rows) |
103 | | - str_table = build_table(name, row_names, str_rows) |
104 | | - outfile.write(str_table) |
105 | | - else: |
106 | | - N = df.shape[0] |
107 | | - str_rows = df.iloc[0:20].apply(build_row_part, axis=1) |
108 | | - str_rows = '\n'.join(str_rows) |
| 111 | + local_blast_dict = dict(list(local_blast_table.groupby('query_seq_id'))) |
| 112 | + ncbi_blast_dict = dict(list(ncbi_blast_table.groupby('query_seq_id'))) |
| 113 | + |
| 114 | + with open(args.output, "w") as outfile: |
| 115 | + outfile.write(HEAD) |
| 116 | + for name in set(local_blast_dict.keys()).union(ncbi_blast_dict.keys()): |
109 | 117 |
|
110 | | - hidden_str_rows = df.iloc[21:min(N, 300)].apply(build_row_part, axis=1) |
111 | | - hidden_str_rows = '\n'.join(hidden_str_rows) |
| 118 | + print(name) |
| 119 | + outfile.write(DBNOTE) |
| 120 | + |
| 121 | + outfile.write(build_table_title(name)) |
112 | 122 |
|
113 | | - str_table = build_table(name, row_names, str_rows, hidden_str_rows) |
114 | | - outfile.write(str_table) |
| 123 | + if name in local_blast_dict: |
| 124 | + outfile.write(build_table_string(name, local_blast_dict[name])) |
115 | 125 |
|
116 | | - outfile.write(PAGEBREAK) |
| 126 | + if name in ncbi_blast_dict: |
| 127 | + outfile.write(build_table_string(name, ncbi_blast_dict[name])) |
| 128 | + |
| 129 | + outfile.write(PAGEBREAK) |
117 | 130 |
|
118 | | - outfile.write(FOOT) |
119 | | - outfile.close() |
| 131 | + outfile.write(FOOT) |
120 | 132 |
|
121 | 133 | if __name__ == '__main__': |
122 | 134 | parser = argparse.ArgumentParser() |
123 | | - parser.add_argument('-b', '--blast', help='A single concatenated BLAST CSV table with hits from multiple samples and multiple database sources.') |
| 135 | + parser.add_argument('-b', '--blast', required=True, help='A single concatenated BLAST CSV table with hits from multiple samples and multiple database sources.') |
| 136 | + parser.add_argument('-n', '--ncbi', required=True, help='A single concatenated BLAST CSV table with hits from multiple samples from NCBI core_nt database.') |
124 | 137 | parser.add_argument('-d', '--db', help='Database CSV file containing ID, DBNAME, and PATH columns.') |
125 | 138 | parser.add_argument('-o', '--output', help='Output HTML report filename.') |
126 | 139 | args = parser.parse_args() |
|
0 commit comments