|
| 1 | +from typing import TextIO, Union |
1 | 2 | from cpm.exceptions import *
|
2 | 3 | from cpm.models import DSM
|
3 | 4 | from os import listdir
|
@@ -31,54 +32,81 @@ def parse_csv_dir(dir_path: str, pattern: str = None, delimiter: str = 'auto',
|
31 | 32 | return dsm_array
|
32 | 33 |
|
33 | 34 |
|
34 |
| -def parse_csv(filepath: str, delimiter: str = 'auto', encoding: str = 'utf-8', instigator: str = 'column'): |
| 35 | +def parse_csv(file: Union[str, TextIO], delimiter: str = 'auto', encoding: str = 'utf-8', instigator: str = 'column'): |
35 | 36 | """
|
36 | 37 | Parse CSV to DSM
|
37 |
| - :param filepath: Targeted CSV file |
| 38 | + :param file: Targeted CSV file or file-like object |
38 | 39 | :param delimiter: CSV delimiter. Defaults to auto-detection.
|
39 | 40 | :param encoding: text-encoding. Defaults to utf-8
|
40 | 41 | :param instigator: Determines directionality of DSM. Defaults to columns instigating rows.
|
41 | 42 | :return: DSM
|
42 | 43 | """
|
43 |
| - |
| 44 | + |
| 45 | + content = _read_file(file, encoding) |
| 46 | + |
44 | 47 | if delimiter == 'auto':
|
45 |
| - with open(filepath, 'r', encoding=encoding) as file: |
46 |
| - delimiter = detect_delimiter(file.read()) |
| 48 | + delimiter = detect_delimiter(content) |
47 | 49 |
|
48 | 50 | # Identify number of rows, and separate header row
|
49 | 51 | num_cols = 0
|
50 | 52 | column_names = []
|
51 |
| - with open(filepath, 'r') as file: |
52 |
| - for line in file: |
53 |
| - column_names.append(line.split(delimiter)[0]) |
54 |
| - num_cols += 1 |
| 53 | + lines = _get_file_lines(file, encoding) |
| 54 | + for line in lines: |
| 55 | + column_names.append(line.split(delimiter)[0]) |
| 56 | + num_cols += 1 |
55 | 57 |
|
56 | 58 | # We do not want the first column in the header
|
57 | 59 | column_names.pop(0)
|
58 | 60 |
|
59 | 61 | data = []
|
60 | 62 |
|
61 |
| - with open(filepath, 'r') as file: |
62 |
| - for i, line in enumerate(file): |
63 |
| - if i == 0: |
| 63 | + for i, line in enumerate(lines): |
| 64 | + if i == 0: |
| 65 | + continue |
| 66 | + data.append([]) |
| 67 | + for j, col in enumerate(line.split(delimiter)): |
| 68 | + if j == 0: |
64 | 69 | continue
|
65 |
| - data.append([]) |
66 |
| - for j, col in enumerate(line.split(delimiter)): |
67 |
| - if j == 0: |
68 |
| - continue |
69 |
| - if col == "": |
| 70 | + if col == "": |
| 71 | + data[i-1].append(None) |
| 72 | + else: |
| 73 | + try: |
| 74 | + data[i-1].append(float(col)) |
| 75 | + except ValueError: |
70 | 76 | data[i-1].append(None)
|
71 |
| - else: |
72 |
| - try: |
73 |
| - data[i-1].append(float(col)) |
74 |
| - except ValueError: |
75 |
| - data[i - 1].append(None) |
76 | 77 |
|
77 | 78 | dsm = DSM(matrix=data, columns=column_names, instigator=instigator)
|
78 | 79 |
|
79 | 80 | return dsm
|
80 | 81 |
|
81 | 82 |
|
| 83 | +def _read_file(file, encoding): |
| 84 | + if isinstance(file, str): |
| 85 | + with open(file, 'r', encoding=encoding) as f: |
| 86 | + return f.read() |
| 87 | + elif hasattr(file, 'read'): |
| 88 | + position = file.tell() |
| 89 | + content = file.read() |
| 90 | + file.seek(position) |
| 91 | + return content |
| 92 | + else: |
| 93 | + raise ValueError("Invalid file input. Must be a filepath or a file-like object.") |
| 94 | + |
| 95 | + |
| 96 | +def _get_file_lines(file, encoding): |
| 97 | + if isinstance(file, str): |
| 98 | + with open(file, 'r', encoding=encoding) as f: |
| 99 | + return f.readlines() |
| 100 | + elif hasattr(file, 'read'): |
| 101 | + position = file.tell() |
| 102 | + file.seek(0) |
| 103 | + lines = file.readlines() |
| 104 | + file.seek(position) |
| 105 | + return lines |
| 106 | + else: |
| 107 | + raise ValueError("Invalid file input. Must be a filepath or a file-like object.") |
| 108 | + |
| 109 | + |
82 | 110 | def detect_delimiter(text, look_ahead=1000):
|
83 | 111 | """
|
84 | 112 | Attempts to determine CSV delmiter based on a certain amount of sample characters
|
@@ -114,4 +142,3 @@ def detect_delimiter(text, look_ahead=1000):
|
114 | 142 | raise AutoDelimiterError('None of the default delimiters matched the file. Is the file empty?')
|
115 | 143 |
|
116 | 144 | return best_delimiter
|
117 |
| - |
|
0 commit comments