@@ -197,6 +197,25 @@ def remove_dead_entries(entries):
197
197
block_start = cur_entry
198
198
199
199
200
+ # Given a string that has non-ASCII UTF-8 bytes 128-255 stored as octal sequences (\200 - \377), decode
201
+ # the sequences back to UTF-8. E.g. "C:\\\303\244 \303\266\\emsdk\\emscripten\\main" -> "C:\\ä ö\\emsdk\\emscripten\\main"
202
+ def decode_octal_encoded_utf8 (str ):
203
+ out = bytearray (len (str ))
204
+ i = 0
205
+ o = 0
206
+ final_length = len (str )
207
+ while i < len (str ):
208
+ if str [i ] == '\\ ' and (str [i + 1 ] == '2' or str [i + 1 ] == '3' ):
209
+ out [o ] = int (str [i + 1 :i + 4 ], 8 )
210
+ i += 4
211
+ final_length -= 3
212
+ else :
213
+ out [o ] = ord (str [i ])
214
+ i += 1
215
+ o += 1
216
+ return out [:final_length ].decode ('utf-8' )
217
+
218
+
200
219
def extract_comp_dir_map (text ):
201
220
map_stmt_list_to_comp_dir = {}
202
221
chunks = re .split (r"0x[0-9a-f]*: DW_TAG_compile_unit" , text )
@@ -205,7 +224,7 @@ def extract_comp_dir_map(text):
205
224
if stmt_list_match is not None :
206
225
stmt_list = stmt_list_match .group (1 )
207
226
comp_dir_match = re .search (r"DW_AT_comp_dir\s+\(\"([^\"]+)\"\)" , chunk )
208
- comp_dir = comp_dir_match .group (1 ) if comp_dir_match is not None else ''
227
+ comp_dir = decode_octal_encoded_utf8 ( comp_dir_match .group (1 ) ) if comp_dir_match is not None else ''
209
228
map_stmt_list_to_comp_dir [stmt_list ] = comp_dir
210
229
return map_stmt_list_to_comp_dir
211
230
@@ -251,12 +270,12 @@ def read_dwarf_entries(wasm, options):
251
270
252
271
include_directories = {'0' : comp_dir }
253
272
for dir in re .finditer (r"include_directories\[\s*(\d+)\] = \"([^\"]*)" , line_chunk ):
254
- include_directories [dir .group (1 )] = os .path .join (comp_dir , dir .group (2 ))
273
+ include_directories [dir .group (1 )] = os .path .join (comp_dir , decode_octal_encoded_utf8 ( dir .group (2 ) ))
255
274
256
275
files = {}
257
276
for file in re .finditer (r"file_names\[\s*(\d+)\]:\s+name: \"([^\"]*)\"\s+dir_index: (\d+)" , line_chunk ):
258
277
dir = include_directories [file .group (3 )]
259
- file_path = os .path .join (dir , file .group (2 ))
278
+ file_path = os .path .join (dir , decode_octal_encoded_utf8 ( file .group (2 ) ))
260
279
files [file .group (1 )] = file_path
261
280
262
281
for line in re .finditer (r"\n0x([0-9a-f]+)\s+(\d+)\s+(\d+)\s+(\d+)(.*?end_sequence)?" , line_chunk ):
@@ -352,8 +371,8 @@ def main():
352
371
353
372
logger .debug ('Saving to %s' % options .output )
354
373
map = build_sourcemap (entries , code_section_offset , options )
355
- with open (options .output , 'w' ) as outfile :
356
- json .dump (map , outfile , separators = (',' , ':' ))
374
+ with open (options .output , 'w' , encoding = 'utf-8' ) as outfile :
375
+ json .dump (map , outfile , separators = (',' , ':' ), ensure_ascii = False )
357
376
358
377
if options .strip :
359
378
wasm = strip_debug_sections (wasm )
0 commit comments