Skip to content

Commit 9fc2c92

Browse files
committed
Fix UTF8 BOM decoding
1 parent fb4da6e commit 9fc2c92

File tree

1 file changed

+28
-12
lines changed
  • backend/donations/views/download_donations

1 file changed

+28
-12
lines changed

backend/donations/views/download_donations/byof.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def handle_external_data_processing(own_upload_id) -> Optional[Dict]:
6464
own_upload.save()
6565

6666
with own_upload.uploaded_data.open() as file:
67-
read_file = file.readline().decode("utf-8")
67+
read_file = decode_readfile(file, one_line=True)
6868
reader = csv.DictReader(io.StringIO(read_file))
6969

7070
header_check = _check_csv_header(reader)
@@ -137,24 +137,40 @@ def generate_xml_from_external_data(own_upload: OwnFormsUpload) -> Dict[str, Uni
137137
return {"error": None, "data": xml_element_tree}
138138

139139

140-
def decode_readfile(input_file):
140+
def decode_readfile(input_file, *, one_line=False) -> str:
141141
# TODO: Try to optimize/simplify this
142-
readfile = input_file.read()
142+
readfile = input_file.readline() if one_line else input_file.read()
143143

144+
# Try to decode the file as utf-8 with BOM - common for Microsoft Office
145+
# We have to do this before trying plain utf-8 because utf-8 with BOM is a subset of utf-8
144146
try:
145-
# Try to decode the file as utf-8
146147
# Deepcopy the readfile to avoid modifying the original bytes
148+
read_file = readfile.decode("utf-8-sig")
149+
except UnicodeDecodeError:
150+
input_file.seek(0)
151+
else:
152+
return read_file
153+
154+
# If utf-8 with BOM fails, try with plain utf-8
155+
try:
156+
# Try to decode the file as utf-8 with BOM
157+
readfile = input_file.readline() if one_line else input_file.read()
147158
read_file = readfile.decode("utf-8")
148159
except UnicodeDecodeError:
149-
# If utf-8 fails, try cp1252 — common for Windows
150-
try:
151-
input_file.seek(0)
152-
readfile = input_file.read()
153-
read_file = readfile.decode("cp1252")
154-
except UnicodeDecodeError:
155-
raise ValueError(_("The file is not in a valid format."))
160+
input_file.seek(0)
161+
else:
162+
return read_file
163+
164+
# If utf-8 fails, try cp1252 - common for Microsoft Windows
165+
try:
166+
readfile = input_file.readline() if one_line else input_file.read()
167+
read_file = readfile.decode("cp1252")
168+
except UnicodeDecodeError:
169+
input_file.seek(0)
170+
else:
171+
return read_file
156172

157-
return read_file
173+
raise ValueError(_("The file is not in a valid format."))
158174

159175

160176
def parse_file_data(file) -> List[DonorModel]:

0 commit comments

Comments
 (0)