Skip to content

Commit fe58d80

Browse files
committed
add FeatureList IO support for TSV/CSV
1 parent 4b5072e commit fe58d80

File tree

12 files changed

+70
-11
lines changed

12 files changed

+70
-11
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ genbank = "sugar._io.genbank"
4646
blast = "sugar._io.tab.blast"
4747
mmseqs = "sugar._io.tab.mmseqs"
4848
infernal = "sugar._io.tab.infernal"
49+
tsv = "sugar._io.tab.tsv"
50+
csv = "sugar._io.tab.tsv"
4951

5052
[build-system]
5153
requires = ["setuptools>=61.0"]

sugar/_io/fasta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from sugar._io.util import _add_fmt_doc
99

1010

11-
filename_extensions = ['fasta', 'fa']
11+
filename_extensions_fasta = ['fasta', 'fa']
1212

1313
def is_fasta(f, **kw):
1414
content = f.read(50)

sugar/_io/gff.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
from sugar._io.util import _add_fmt_doc
1414

1515

16-
filename_extensions = ['gff']
17-
filename_extensions_fts = ['gff']
16+
filename_extensions_gff = ['gff']
17+
filename_extensions_fts_gff = ['gff']
1818

1919

2020
def is_gff(f, **kw):

sugar/_io/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ def detect_ext(fname, what='seqs'):
9393
return
9494
for fmt in FMTS_ALL[what]:
9595
module = EPS[what][fmt].load()
96-
if hasattr(module, 'filename_extensions' + suf):
97-
if ext in getattr(module, 'filename_extensions' + suf):
96+
if hasattr(module, f'filename_extensions{suf}_{fmt}'):
97+
if ext in getattr(module, f'filename_extensions{suf}_{fmt}'):
9898
return fmt
9999

100100

sugar/_io/sjson.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
BioBasket, BioSeq
1919
)
2020

21-
filename_extensions = ['sjson', 'json']
21+
filename_extensions_sjson = ['sjson', 'json']
2222
COMMENT = f'sugar JSON format written by sugar v{__version__}'
2323

2424

sugar/_io/stockholm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from sugar._io.util import _add_fmt_doc
1111

1212

13-
filename_extensions = ['stk', 'sto', 'stockholm']
13+
filename_extensions_stockholm = ['stk', 'sto', 'stockholm']
1414

1515

1616
def is_stockholm(f, **kw):

sugar/_io/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88

99
FMTS = {'seqs': ['fasta', 'genbank', 'stockholm', 'gff', 'sjson'],
10-
'fts': ['gff', 'genbank', 'infernal', 'mmseqs', 'blast']}
10+
'fts': ['gff', 'genbank', 'infernal', 'mmseqs', 'blast', 'tsv', 'csv']}
1111

1212

1313
def _epsname_key(epsname, what='seqs'):

sugar/core/fts.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,23 @@ def __init__(self, data=None):
418418
data = data.data
419419
super().__init__(data)
420420

421+
@classmethod
422+
def frompandas(cls, df, ftype=None):
423+
if ftype is not None and 'type' not in df:
424+
if ftype in df:
425+
df['type'] = df[ftype]
426+
else:
427+
df['type'] = ftype
428+
fts = []
429+
for rec in df.to_dict('records'):
430+
loc = Location(rec.pop('start'),
431+
rec.pop('stop'),
432+
strand=rec.pop('strand', '+'),
433+
defect=rec.pop('defect', Defect.NONE))
434+
ft = Feature(locs=[loc], meta=rec)
435+
fts.append(ft)
436+
return cls(fts)
437+
421438
def __str__(self):
422439
return self.tostr()
423440

sugar/tests/data/fts_example.csv

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
type,start,stop,strand
2+
region,0,108689991,+
3+
pseudogene,43129,43626,-
4+
exon,43129,43626,-
5+
gene,61869,98682,+
6+
mRNA,61869,98682,+
7+
exon,61869,62065,+
8+
exon,75477,75526,+
9+
exon,76998,77025,+
10+
exon,80391,80417,+
11+
exon,82620,82717,+
12+
exon,83974,84123,+
13+
exon,86529,86621,+
14+
exon,88006,88135,+
15+
exon,96921,98682,+
16+
CDS,61943,97199,+
17+
cDNA_match,101888622,101892867,-
18+
cDNA_match,103140200,103170945,-
19+
cDNA_match,103944892,103952028,-
20+
cDNA_match,107859806,107862198,-

sugar/tests/data/fts_example.tsv

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
type start stop strand
2+
region 0 108689991 +
3+
pseudogene 43129 43626 -
4+
exon 43129 43626 -
5+
gene 61869 98682 +
6+
mRNA 61869 98682 +
7+
exon 61869 62065 +
8+
exon 75477 75526 +
9+
exon 76998 77025 +
10+
exon 80391 80417 +
11+
exon 82620 82717 +
12+
exon 83974 84123 +
13+
exon 86529 86621 +
14+
exon 88006 88135 +
15+
exon 96921 98682 +
16+
CDS 61943 97199 +
17+
cDNA_match 101888622 101892867 -
18+
cDNA_match 103140200 103170945 -
19+
cDNA_match 103944892 103952028 -
20+
cDNA_match 107859806 107862198 -

0 commit comments

Comments
 (0)