Skip to content

Commit 0cf3c43

Browse files
StefanBossbalyStefan Bossbaly
andauthored
[HWASan] Improve symbol indexing (#135967)
Previously we would add any ELF that contained a build id regardless whether the ELF contained symbols or not. This works for Android since soong will strip the symbols into a new directory. However other build systems, like BUCK, will write the stripped file in the same directory as the unstripped file. This would cause the hwasan_symbolize script sometimes add then stripped ELF to its index and ignore the symbolized ELF. The logic has now been changed to only add ELFs that contain symbols to the index. If two symbolized ELFs are encountered with the same build id, we now exit out with an error. Fixes #135966 --------- Co-authored-by: Stefan Bossbaly <sboss@meta.com>
1 parent 01cb390 commit 0cf3c43

File tree

1 file changed

+72
-16
lines changed

1 file changed

+72
-16
lines changed

compiler-rt/lib/hwasan/scripts/hwasan_symbolize

Lines changed: 72 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ from __future__ import unicode_literals
1616

1717
import argparse
1818
import glob
19+
import hashlib
1920
import html
2021
import json
2122
import mmap
@@ -37,8 +38,9 @@ if sys.version_info.major < 3:
3738
Ehdr_size = 64
3839
e_shnum_offset = 60
3940
e_shoff_offset = 40
40-
41+
e_shstrndx_offset = 62
4142
Shdr_size = 64
43+
sh_name_offset = 0
4244
sh_type_offset = 4
4345
sh_offset_offset = 24
4446
sh_size_offset = 32
@@ -62,33 +64,70 @@ def handle_Nhdr(mv, sh_size):
6264
offset += Nhdr_size + align_up(n_namesz, 4) + align_up(n_descsz, 4)
6365
return None
6466

65-
def handle_Shdr(mv):
67+
def handle_shstrtab(mv, e_shoff):
68+
e_shstrndx, = struct.unpack_from('<H', buffer=mv, offset=e_shstrndx_offset)
69+
70+
start_shstrndx = e_shoff + e_shstrndx * Shdr_size
71+
shstrndx_sh = mv[start_shstrndx: start_shstrndx + Shdr_size]
72+
_, shstrndx_sh_offset, shstrndx_sh_size = handle_Shdr(shstrndx_sh)
73+
return mv[shstrndx_sh_offset:shstrndx_sh_offset + shstrndx_sh_size]
74+
75+
def read_string(mv):
76+
name = ""
77+
for byte in mv:
78+
char = chr(byte)
79+
if char == '\x00':
80+
break
81+
name += char
82+
return name
83+
84+
def unpack_sh_type(mv):
6685
sh_type, = struct.unpack_from('<I', buffer=mv, offset=sh_type_offset)
67-
if sh_type != SHT_NOTE:
68-
return None, None
86+
return sh_type
87+
88+
def handle_Shdr(mv):
89+
name_offset, = struct.unpack_from('<I', buffer=mv, offset=sh_name_offset)
6990
sh_offset, = struct.unpack_from('<Q', buffer=mv, offset=sh_offset_offset)
7091
sh_size, = struct.unpack_from('<Q', buffer=mv, offset=sh_size_offset)
71-
return sh_offset, sh_size
92+
return name_offset, sh_offset, sh_size
7293

7394
def handle_elf(mv):
7495
# \x02 is ELFCLASS64, \x01 is ELFDATA2LSB. HWASan currently only works on
7596
# 64-bit little endian platforms (x86_64 and ARM64). If this changes, we will
7697
# have to extend the parsing code.
7798
if mv[:6] != b'\x7fELF\x02\x01':
7899
return None
100+
found_symbols = False
101+
bid = None
79102
e_shnum, = struct.unpack_from('<H', buffer=mv, offset=e_shnum_offset)
80103
e_shoff, = struct.unpack_from('<Q', buffer=mv, offset=e_shoff_offset)
104+
105+
# Section where all the section header names are stored.
106+
shstr = handle_shstrtab(mv, e_shoff)
107+
81108
for i in range(0, e_shnum):
82109
start = e_shoff + i * Shdr_size
83-
sh_offset, sh_size = handle_Shdr(mv[start: start + Shdr_size])
84-
if sh_offset is None:
85-
continue
86-
note_hdr = mv[sh_offset: sh_offset + sh_size]
87-
result = handle_Nhdr(note_hdr, sh_size)
88-
if result is not None:
89-
return result
110+
sh = mv[start: start + Shdr_size]
111+
sh_name_offset, sh_offset, sh_size = handle_Shdr(sh)
112+
sh_name = read_string(shstr[sh_name_offset:])
113+
sh_type = unpack_sh_type(sh)
114+
115+
if sh_name == ".debug_info":
116+
found_symbols = True
117+
if sh_type == SHT_NOTE:
118+
if sh_offset is None:
119+
continue
120+
note_hdr = mv[sh_offset: sh_offset + sh_size]
121+
result = handle_Nhdr(note_hdr, sh_size)
122+
if result is not None:
123+
bid = result
124+
125+
if found_symbols:
126+
return bid
127+
else:
128+
return None
90129

91-
def get_buildid(filename):
130+
def read_elf(filename):
92131
with open(filename, "r") as fd:
93132
if os.fstat(fd.fileno()).st_size < Ehdr_size:
94133
return None
@@ -200,7 +239,7 @@ class Symbolizer:
200239
if os.path.exists(full_path):
201240
return full_path
202241
if name not in self.__warnings:
203-
print("Could not find symbols for", name, file=sys.stderr)
242+
print("Could not find symbols for {} (Build ID: {})".format(name, buildid), file=sys.stderr)
204243
self.__warnings.add(name)
205244
return None
206245

@@ -268,13 +307,30 @@ class Symbolizer:
268307
for fn in fnames:
269308
filename = os.path.join(dname, fn)
270309
try:
271-
bid = get_buildid(filename)
310+
bid = read_elf(filename)
272311
except FileNotFoundError:
273312
continue
274313
except Exception as e:
275314
print("Failed to parse {}: {}".format(filename, e), file=sys.stderr)
276315
continue
277-
if bid is not None:
316+
if bid is None:
317+
continue
318+
319+
if bid in self.__index:
320+
index_filename = self.__index[bid]
321+
322+
if os.path.samefile(index_filename, filename):
323+
continue
324+
325+
with open(filename, "rb") as f:
326+
file_hash = hashlib.file_digest(f, "sha256")
327+
328+
with open(index_filename, "rb") as f:
329+
index_file_hash = hashlib.file_digest(f, "sha256")
330+
331+
if index_file_hash.digest() != file_hash.digest():
332+
print("Build ID collision! Files share the same BuildId ({}) but their contents differ. Files {} and {} ".format(bid, filename, index_filename), file=sys.stderr)
333+
else:
278334
self.__index[bid] = filename
279335

280336
def symbolize_line(self, line):

0 commit comments

Comments
 (0)