Skip to content

Commit d377c4f

Browse files
authored
Update parse.py
Added the option of passing a file-like object to the parse function instead of only a file path. This enables Streamlit to use uploaded files that are kept in memory.
1 parent 359f02c commit d377c4f

File tree

1 file changed

+46
-21
lines changed

1 file changed

+46
-21
lines changed

cpm/parse.py

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,48 +2,73 @@
22
from cpm.models import DSM
33

44

5-
def parse_csv(filepath: str, delimiter: str = 'auto', encoding: str = 'utf-8', instigator: str = 'column'):
5+
def parse_csv(file, delimiter: str = 'auto', encoding: str = 'utf-8', instigator: str = 'column'):
66
"""
77
Parse CSV to DSM
8-
:param filepath: Targeted CSV file
8+
:param file: Targeted CSV file or file-like object
99
:param delimiter: CSV delimiter. Defaults to auto-detection.
1010
:param encoding: text-encoding. Defaults to utf-8
1111
:param instigator: Determines directionality of DSM. Defaults to columns instigating rows.
1212
:return: DSM
1313
"""
1414

15+
def read_file(file):
16+
if isinstance(file, str):
17+
with open(file, 'r', encoding=encoding) as f:
18+
return f.read()
19+
elif hasattr(file, 'read'):
20+
position = file.tell()
21+
content = file.read()
22+
file.seek(position)
23+
return content
24+
else:
25+
raise ValueError("Invalid file input. Must be a filepath or a file-like object.")
26+
27+
def get_file_lines(file):
28+
if isinstance(file, str):
29+
with open(file, 'r', encoding=encoding) as f:
30+
return f.readlines()
31+
elif hasattr(file, 'read'):
32+
position = file.tell()
33+
file.seek(0)
34+
lines = file.readlines()
35+
file.seek(position)
36+
return lines
37+
else:
38+
raise ValueError("Invalid file input. Must be a filepath or a file-like object.")
39+
40+
content = read_file(file)
41+
1542
if delimiter == 'auto':
16-
with open(filepath, 'r', encoding=encoding) as file:
17-
delimiter = detect_delimiter(file.read())
43+
delimiter = detect_delimiter(content)
1844

1945
# Identify number of rows, and separate header row
2046
num_cols = 0
2147
column_names = []
22-
with open(filepath, 'r') as file:
23-
for line in file:
24-
column_names.append(line.split(delimiter)[0])
25-
num_cols += 1
48+
lines = get_file_lines(file)
49+
for line in lines:
50+
column_names.append(line.split(delimiter)[0])
51+
num_cols += 1
2652

2753
# We do not want the first column in the header
2854
column_names.pop(0)
2955

3056
data = []
3157

32-
with open(filepath, 'r') as file:
33-
for i, line in enumerate(file):
34-
if i == 0:
58+
for i, line in enumerate(lines):
59+
if i == 0:
60+
continue
61+
data.append([])
62+
for j, col in enumerate(line.split(delimiter)):
63+
if j == 0:
3564
continue
36-
data.append([])
37-
for j, col in enumerate(line.split(delimiter)):
38-
if j == 0:
39-
continue
40-
if col == "":
65+
if col == "":
66+
data[i-1].append(None)
67+
else:
68+
try:
69+
data[i-1].append(float(col))
70+
except ValueError:
4171
data[i-1].append(None)
42-
else:
43-
try:
44-
data[i-1].append(float(col))
45-
except ValueError:
46-
data[i - 1].append(None)
4772

4873
dsm = DSM(matrix=data, columns=column_names, instigator=instigator)
4974

0 commit comments

Comments
 (0)