Skip to content

Commit 79ac328

Browse files
authored
Fix parsing of dwarfdump output to be more resilient (#20777)
To generate source maps, we run llvm-dwarfdump against wasm file and parse its plain-text output heavily relied on regex matches. Previously, in order to retrieve comp dir from debug_info section, we search regex patterns assuming that the output has entries DW_AT_stmt_list and DW_AT_comp_dir consecutively in this order, but I noticed that assumption doesn't hold in some cases. With this change, it can parse comp_dir regardless of the order of DW_* entries.
1 parent 813520b commit 79ac328

File tree

5 files changed

+158
-8
lines changed

5 files changed

+158
-8
lines changed

AUTHORS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,3 +596,4 @@ a license to everyone to use it as detailed in LICENSE.)
596596
* 郑苏波 (Super Zheng) <superzheng@tencent.com>
597597
* James Hu <jameshu2022@gmail.com>
598598
* Jerry Zhuang <jerry.zhuang@jwzg.com>
599+
* Taisei Kon <kinsei0916@gmail.com>
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
bar.wasm: file format WASM
2+
3+
.debug_info contents:
4+
0x00000000: Compile Unit: length = 0x00000129, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x04 (next unit at 0x0000012d)
5+
6+
0x0000000b: DW_TAG_compile_unit
7+
DW_AT_stmt_list (0x00000000)
8+
DW_AT_ranges (0x00000000
9+
[0x00000002, 0x0000000a)
10+
[0x0000000b, 0x00000015)
11+
[0x00000016, 0x00000030)
12+
[0x00000031, 0x00000039))
13+
DW_AT_name ("system/lib/compiler-rt/stack_ops.S")
14+
DW_AT_comp_dir ("/emsdk/emscripten")
15+
DW_AT_producer ("clang version 18.0.0 (https://github.com/llvm/llvm-project 269685545e439ad050b67740533c59f965cae955)")
16+
DW_AT_language (DW_LANG_Mips_Assembler)
17+
18+
.debug_line contents:
19+
debug_line[0x00000000]
20+
Line table prologue:
21+
total_length: 0x00000091
22+
format: DWARF32
23+
version: 4
24+
prologue_length: 0x00000043
25+
min_inst_length: 1
26+
max_ops_per_inst: 1
27+
default_is_stmt: 1
28+
line_base: -5
29+
line_range: 14
30+
opcode_base: 13
31+
standard_opcode_lengths[DW_LNS_copy] = 0
32+
standard_opcode_lengths[DW_LNS_advance_pc] = 1
33+
standard_opcode_lengths[DW_LNS_advance_line] = 1
34+
standard_opcode_lengths[DW_LNS_set_file] = 1
35+
standard_opcode_lengths[DW_LNS_set_column] = 1
36+
standard_opcode_lengths[DW_LNS_negate_stmt] = 0
37+
standard_opcode_lengths[DW_LNS_set_basic_block] = 0
38+
standard_opcode_lengths[DW_LNS_const_add_pc] = 0
39+
standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
40+
standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
41+
standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
42+
standard_opcode_lengths[DW_LNS_set_isa] = 1
43+
include_directories[ 1] = "system/lib/compiler-rt"
44+
file_names[ 1]:
45+
name: "stack_ops.S"
46+
dir_index: 1
47+
mod_time: 0x00000000
48+
length: 0x00000000
49+
50+
Address Line Column File ISA Discriminator OpIndex Flags
51+
------------------ ------ ------ ------ --- ------------- ------- -------------
52+
0x0000000000000002 18 0 1 0 0 0 is_stmt
53+
0x0000000000000009 19 0 1 0 0 0 is_stmt
54+
0x000000000000000a 19 0 1 0 0 0 is_stmt end_sequence
55+
0x000000000000000b 23 0 1 0 0 0 is_stmt
56+
0x000000000000000e 24 0 1 0 0 0 is_stmt
57+
0x0000000000000014 25 0 1 0 0 0 is_stmt
58+
0x0000000000000015 25 0 1 0 0 0 is_stmt end_sequence
59+
0x0000000000000019 30 0 1 0 0 0 is_stmt
60+
0x000000000000001f 32 0 1 0 0 0 is_stmt
61+
0x0000000000000021 34 0 1 0 0 0 is_stmt
62+
0x0000000000000022 36 0 1 0 0 0 is_stmt
63+
0x0000000000000024 37 0 1 0 0 0 is_stmt
64+
0x0000000000000025 38 0 1 0 0 0 is_stmt
65+
0x0000000000000027 39 0 1 0 0 0 is_stmt
66+
0x000000000000002d 40 0 1 0 0 0 is_stmt
67+
0x000000000000002f 41 0 1 0 0 0 is_stmt
68+
0x0000000000000030 41 0 1 0 0 0 is_stmt end_sequence
69+
0x0000000000000031 45 0 1 0 0 0 is_stmt
70+
0x0000000000000038 46 0 1 0 0 0 is_stmt
71+
0x0000000000000039 46 0 1 0 0 0 is_stmt end_sequence
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
foo.wasm: file format WASM
2+
3+
.debug_info contents:
4+
0x00000000: Compile Unit: length = 0x0000003a, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x04 (next unit at 0x0000003e)
5+
6+
0x0000000b: DW_TAG_compile_unit
7+
DW_AT_producer ("clang version 18.0.0 (https://github.com/llvm/llvm-project 269685545e439ad050b67740533c59f965cae955)")
8+
DW_AT_language (DW_LANG_C11)
9+
DW_AT_name ("system/lib/compiler-rt/__trap.c")
10+
DW_AT_stmt_list (0x00000000)
11+
DW_AT_comp_dir ("/emsdk/emscripten")
12+
DW_AT_low_pc (0x00000002)
13+
DW_AT_high_pc (0x00000006)
14+
15+
.debug_line contents:
16+
debug_line[0x00000000]
17+
Line table prologue:
18+
total_length: 0x00000059
19+
format: DWARF32
20+
version: 4
21+
prologue_length: 0x00000040
22+
min_inst_length: 1
23+
max_ops_per_inst: 1
24+
default_is_stmt: 1
25+
line_base: -5
26+
line_range: 14
27+
opcode_base: 13
28+
standard_opcode_lengths[DW_LNS_copy] = 0
29+
standard_opcode_lengths[DW_LNS_advance_pc] = 1
30+
standard_opcode_lengths[DW_LNS_advance_line] = 1
31+
standard_opcode_lengths[DW_LNS_set_file] = 1
32+
standard_opcode_lengths[DW_LNS_set_column] = 1
33+
standard_opcode_lengths[DW_LNS_negate_stmt] = 0
34+
standard_opcode_lengths[DW_LNS_set_basic_block] = 0
35+
standard_opcode_lengths[DW_LNS_const_add_pc] = 0
36+
standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
37+
standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
38+
standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
39+
standard_opcode_lengths[DW_LNS_set_isa] = 1
40+
include_directories[ 1] = "system/lib/compiler-rt"
41+
file_names[ 1]:
42+
name: "__trap.c"
43+
dir_index: 1
44+
mod_time: 0x00000000
45+
length: 0x00000000
46+
47+
Address Line Column File ISA Discriminator OpIndex Flags
48+
------------------ ------ ------ ------ --- ------------- ------- -------------
49+
0x0000000000000003 2 3 1 0 0 0 is_stmt prologue_end
50+
0x0000000000000004 3 1 1 0 0 0 is_stmt
51+
0x0000000000000006 3 1 1 0 0 0 is_stmt end_sequence

test/test_other.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from functools import wraps
1010
import glob
1111
import gzip
12+
import importlib
1213
import itertools
1314
import json
1415
import os
@@ -9589,6 +9590,24 @@ def test(infile, source_map_added_dir=''):
95899590
ensure_dir('inner')
95909591
test('inner/a.cpp', 'inner')
95919592

9593+
def test_wasm_sourcemap_extract_comp_dir_map(self):
9594+
wasm_sourcemap = importlib.import_module('tools.wasm-sourcemap')
9595+
9596+
def test(dump_file):
9597+
dwarfdump_output = read_file(
9598+
test_file(
9599+
os.path.join('other/wasm_sourcemap_extract_comp_dir_map',
9600+
dump_file)))
9601+
map_stmt_list_to_comp_dir = wasm_sourcemap.extract_comp_dir_map(
9602+
dwarfdump_output)
9603+
self.assertEqual(map_stmt_list_to_comp_dir,
9604+
{'0x00000000': '/emsdk/emscripten'})
9605+
9606+
# Make sure we can extract the compilation directories no matter what the
9607+
# order of `DW_AT_*` attributes is.
9608+
test('foo.wasm.dump')
9609+
test('bar.wasm.dump')
9610+
95929611
def test_emsymbolizer(self):
95939612
def check_dwarf_loc_info(address, funcs, locs):
95949613
out = self.run_process(

tools/wasm-sourcemap.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,19 @@ def remove_dead_entries(entries):
178178
block_start = cur_entry
179179

180180

181+
def extract_comp_dir_map(text):
182+
map_stmt_list_to_comp_dir = {}
183+
chunks = re.split(r"0x[0-9a-f]*: DW_TAG_compile_unit", text)
184+
for chunk in chunks[1:]:
185+
stmt_list_match = re.search(r"DW_AT_stmt_list\s+\((0x[0-9a-f]*)\)", chunk)
186+
if stmt_list_match is not None:
187+
stmt_list = stmt_list_match.group(1)
188+
comp_dir_match = re.search(r"DW_AT_comp_dir\s+\(\"([^\"]+)\"\)", chunk)
189+
comp_dir = comp_dir_match.group(1) if comp_dir_match is not None else ''
190+
map_stmt_list_to_comp_dir[stmt_list] = comp_dir
191+
return map_stmt_list_to_comp_dir
192+
193+
181194
def read_dwarf_entries(wasm, options):
182195
if options.dwarfdump_output:
183196
output = Path(options.dwarfdump_output).read_bytes()
@@ -198,14 +211,9 @@ def read_dwarf_entries(wasm, options):
198211

199212
entries = []
200213
debug_line_chunks = re.split(r"debug_line\[(0x[0-9a-f]*)\]", output.decode('utf-8'))
201-
maybe_debug_info_content = debug_line_chunks[0]
202-
for i in range(1, len(debug_line_chunks), 2):
203-
stmt_list = debug_line_chunks[i]
204-
comp_dir_match = re.search(r"DW_AT_stmt_list\s+\(" + stmt_list + r"\)\s+" +
205-
r"DW_AT_comp_dir\s+\(\"([^\"]+)", maybe_debug_info_content)
206-
comp_dir = comp_dir_match.group(1) if comp_dir_match is not None else ""
207-
208-
line_chunk = debug_line_chunks[i + 1]
214+
map_stmt_list_to_comp_dir = extract_comp_dir_map(debug_line_chunks[0])
215+
for stmt_list, line_chunk in zip(debug_line_chunks[1::2], debug_line_chunks[2::2]):
216+
comp_dir = map_stmt_list_to_comp_dir.get(stmt_list, '')
209217

210218
# include_directories[ 1] = "/Users/yury/Work/junk/sqlite-playground/src"
211219
# file_names[ 1]:

0 commit comments

Comments
 (0)