@@ -64,7 +64,7 @@ def handle_external_data_processing(own_upload_id) -> Optional[Dict]:
64
64
own_upload .save ()
65
65
66
66
with own_upload .uploaded_data .open () as file :
67
- read_file = file . readline (). decode ( "utf-8" )
67
+ read_file = decode_readfile ( file , one_line = True )
68
68
reader = csv .DictReader (io .StringIO (read_file ))
69
69
70
70
header_check = _check_csv_header (reader )
@@ -137,24 +137,40 @@ def generate_xml_from_external_data(own_upload: OwnFormsUpload) -> Dict[str, Uni
137
137
return {"error" : None , "data" : xml_element_tree }
138
138
139
139
140
- def decode_readfile (input_file ) :
140
+ def decode_readfile (input_file , * , one_line = False ) -> str :
141
141
# TODO: Try to optimize/simplify this
142
- readfile = input_file .read ()
142
+ readfile = input_file .readline () if one_line else input_file . read ()
143
143
144
+ # Try to decode the file as utf-8 with BOM - common for Microsoft Office
145
+ # We have to do this before trying plain utf-8 because utf-8 with BOM is a subset of utf-8
144
146
try :
145
- # Try to decode the file as utf-8
146
147
# Deepcopy the readfile to avoid modifying the original bytes
148
+ read_file = readfile .decode ("utf-8-sig" )
149
+ except UnicodeDecodeError :
150
+ input_file .seek (0 )
151
+ else :
152
+ return read_file
153
+
154
+ # If utf-8 with BOM fails, try with plain utf-8
155
+ try :
156
+ # Try to decode the file as utf-8 with BOM
157
+ readfile = input_file .readline () if one_line else input_file .read ()
147
158
read_file = readfile .decode ("utf-8" )
148
159
except UnicodeDecodeError :
149
- # If utf-8 fails, try cp1252 — common for Windows
150
- try :
151
- input_file .seek (0 )
152
- readfile = input_file .read ()
153
- read_file = readfile .decode ("cp1252" )
154
- except UnicodeDecodeError :
155
- raise ValueError (_ ("The file is not in a valid format." ))
160
+ input_file .seek (0 )
161
+ else :
162
+ return read_file
163
+
164
+ # If utf-8 fails, try cp1252 - common for Microsoft Windows
165
+ try :
166
+ readfile = input_file .readline () if one_line else input_file .read ()
167
+ read_file = readfile .decode ("cp1252" )
168
+ except UnicodeDecodeError :
169
+ input_file .seek (0 )
170
+ else :
171
+ return read_file
156
172
157
- return read_file
173
+ raise ValueError ( _ ( "The file is not in a valid format." ))
158
174
159
175
160
176
def parse_file_data (file ) -> List [DonorModel ]:
0 commit comments