Skip to content

Commit 1577417

Browse files
authored
Fix -gsource-map with UTF-8 chars in directory and file names in the path. (#24560)
Fix -gsource-map with UTF-8 chars in directory and file names in the path. Fixes #24557.
1 parent be89742 commit 1577417

File tree

2 files changed

+28
-9
lines changed

2 files changed

+28
-9
lines changed

test/test_other.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10757,7 +10757,7 @@ def test_wasm_sourcemap_relative_paths(self):
1075710757
ensure_dir('build')
1075810758

1075910759
def test(infile, source_map_added_dir=''):
10760-
expected_source_map_path = 'a.cpp'
10760+
expected_source_map_path = 'A ä☃ö Z.cpp'
1076110761
if source_map_added_dir:
1076210762
expected_source_map_path = source_map_added_dir + '/' + expected_source_map_path
1076310763
print(infile, expected_source_map_path)
@@ -10778,10 +10778,10 @@ def test(infile, source_map_added_dir=''):
1077810778
self.run_process([EMCC, 'a.o', '-gsource-map'], cwd='build')
1077910779
self.assertIn('"../%s"' % expected_source_map_path, read_file('build/a.out.wasm.map'))
1078010780

10781-
test('a.cpp')
10781+
test('A ä☃ö Z.cpp')
1078210782

10783-
ensure_dir('inner')
10784-
test('inner/a.cpp', 'inner')
10783+
ensure_dir('inner Z ö☃ä A')
10784+
test('inner Z ö☃ä A/A ä☃ö Z.cpp', 'inner Z ö☃ä A')
1078510785

1078610786
def test_wasm_sourcemap_extract_comp_dir_map(self):
1078710787
wasm_sourcemap = importlib.import_module('tools.wasm-sourcemap')

tools/wasm-sourcemap.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,25 @@ def remove_dead_entries(entries):
197197
block_start = cur_entry
198198

199199

200+
# Given a string that has non-ASCII UTF-8 bytes 128-255 stored as octal sequences (\200 - \377), decode
201+
# the sequences back to UTF-8. E.g. "C:\\\303\244 \303\266\\emsdk\\emscripten\\main" -> "C:\\ä ö\\emsdk\\emscripten\\main"
202+
def decode_octal_encoded_utf8(str):
203+
out = bytearray(len(str))
204+
i = 0
205+
o = 0
206+
final_length = len(str)
207+
while i < len(str):
208+
if str[i] == '\\' and (str[i + 1] == '2' or str[i + 1] == '3'):
209+
out[o] = int(str[i + 1:i + 4], 8)
210+
i += 4
211+
final_length -= 3
212+
else:
213+
out[o] = ord(str[i])
214+
i += 1
215+
o += 1
216+
return out[:final_length].decode('utf-8')
217+
218+
200219
def extract_comp_dir_map(text):
201220
map_stmt_list_to_comp_dir = {}
202221
chunks = re.split(r"0x[0-9a-f]*: DW_TAG_compile_unit", text)
@@ -205,7 +224,7 @@ def extract_comp_dir_map(text):
205224
if stmt_list_match is not None:
206225
stmt_list = stmt_list_match.group(1)
207226
comp_dir_match = re.search(r"DW_AT_comp_dir\s+\(\"([^\"]+)\"\)", chunk)
208-
comp_dir = comp_dir_match.group(1) if comp_dir_match is not None else ''
227+
comp_dir = decode_octal_encoded_utf8(comp_dir_match.group(1)) if comp_dir_match is not None else ''
209228
map_stmt_list_to_comp_dir[stmt_list] = comp_dir
210229
return map_stmt_list_to_comp_dir
211230

@@ -251,12 +270,12 @@ def read_dwarf_entries(wasm, options):
251270

252271
include_directories = {'0': comp_dir}
253272
for dir in re.finditer(r"include_directories\[\s*(\d+)\] = \"([^\"]*)", line_chunk):
254-
include_directories[dir.group(1)] = os.path.join(comp_dir, dir.group(2))
273+
include_directories[dir.group(1)] = os.path.join(comp_dir, decode_octal_encoded_utf8(dir.group(2)))
255274

256275
files = {}
257276
for file in re.finditer(r"file_names\[\s*(\d+)\]:\s+name: \"([^\"]*)\"\s+dir_index: (\d+)", line_chunk):
258277
dir = include_directories[file.group(3)]
259-
file_path = os.path.join(dir, file.group(2))
278+
file_path = os.path.join(dir, decode_octal_encoded_utf8(file.group(2)))
260279
files[file.group(1)] = file_path
261280

262281
for line in re.finditer(r"\n0x([0-9a-f]+)\s+(\d+)\s+(\d+)\s+(\d+)(.*?end_sequence)?", line_chunk):
@@ -352,8 +371,8 @@ def main():
352371

353372
logger.debug('Saving to %s' % options.output)
354373
map = build_sourcemap(entries, code_section_offset, options)
355-
with open(options.output, 'w') as outfile:
356-
json.dump(map, outfile, separators=(',', ':'))
374+
with open(options.output, 'w', encoding='utf-8') as outfile:
375+
json.dump(map, outfile, separators=(',', ':'), ensure_ascii=False)
357376

358377
if options.strip:
359378
wasm = strip_debug_sections(wasm)

0 commit comments

Comments
 (0)