Skip to content

Commit b3c7d5b

Browse files
committed
ENH SemiBin1 cannot use --abundances
1 parent 3c4f79a commit b3c7d5b

File tree

3 files changed

+6
-53
lines changed

3 files changed

+6
-53
lines changed

SemiBin/main.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,6 +1402,10 @@ def main2(args=None, is_semibin2=True):
14021402
if is_semibin2 and getattr(args, 'training_type', None) == 'semi':
14031403
logger.info('Currently using semi-supervised mode. This is generally only useful for backwards compability.')
14041404

1405+
if not is_semibin2 and getattr(args, 'abundances', None) is not None:
1406+
logger.error(f'--abundances cannot be used in SemiBin1.')
1407+
sys.exit(1)
1408+
14051409
if args.cmd == 'citation':
14061410
from . import citation
14071411
if args.cite_format == 'bibtex':

integration-tests/generate_data_multi_command.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
assert data_split.shape == (40, 146)
1414

1515
# running with abundance file from strobealign-aemb
16-
subprocess.check_call('SemiBin1 generate_sequence_features_multi '
16+
subprocess.check_call('SemiBin2 generate_sequence_features_multi '
1717
'-i test/multi_samples_data/input_multi.fasta '
1818
'-o test-outputs/output_multi_fa -m 2500 '
1919
'--ratio 0.05 --ml-threshold 4000 -p 1 '

script/generate_split.py

Lines changed: 1 addition & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,8 @@
11
import argparse
22
from atomicwrites import atomic_write
3+
from SemiBin.fasta import fasta_iter
34
import os
45

5-
def fasta_iter(fname, full_header=False):
6-
'''Iterate over a (possibly gzipped) FASTA file
7-
8-
Parameters
9-
----------
10-
fname : str
11-
Filename.
12-
If it ends with .gz, gzip format is assumed
13-
If .bz2 then bzip2 format is assumed
14-
if .xz, then lzma format is assumerd
15-
full_header : boolean (optional)
16-
If True, yields the full header. Otherwise (the default), only the
17-
first word
18-
19-
Yields
20-
------
21-
(h,seq): tuple of (str, str)
22-
'''
23-
header = None
24-
chunks = []
25-
if hasattr(fname, 'readline'):
26-
op = lambda f,_ : f
27-
elif fname.endswith('.gz'):
28-
import gzip
29-
op = gzip.open
30-
elif fname.endswith('.bz2'):
31-
import bz2
32-
op = bz2.open
33-
elif fname.endswith('.xz'):
34-
import lzma
35-
op = lzma.open
36-
else:
37-
op = open
38-
with op(fname, 'rt') as f:
39-
for line in f:
40-
if line[0] == '>':
41-
if header is not None:
42-
yield header,''.join(chunks)
43-
line = line[1:].strip()
44-
if not line:
45-
header = ''
46-
elif full_header:
47-
header = line.strip()
48-
else:
49-
header = line.split()[0]
50-
chunks = []
51-
else:
52-
chunks.append(line.strip())
53-
if header is not None:
54-
yield header, ''.join(chunks)
55-
56-
576
def generate_file(contig_file, output, min_length, name):
587
os.makedirs(output, exist_ok=True)
598

0 commit comments

Comments
 (0)