Skip to content

Commit 9bf1888

Browse files
committed
Use segment's off and vaddr fields instead of IsSharedObject heuristic
This patch unifies the handling of ET_EXEC and ET_DYN ELF files by clarifying the difference between "offset" of function's code in ELF file, its "virtual address" that is recorded in the file (according to ELF specification, it is the "symbol value" written to st_value field in a symbol table entry in case of executable and shared object ELF files) and the actual address of function in the address space of a process after dynamic relocations took place. Please note that file offset and virtual address are usually the same in ET_DYN files (such as shared objects and position-independent executables) but it is not required and this assumption is sometimes violated in real-life scenarios. Reviewed By: tnfchris Differential Revision: https://reviews.llvm.org/D144852 ~~ Huawei RRI, OS Lab
1 parent 7c485bf commit 9bf1888

20 files changed

+2555
-62
lines changed

lnt/testing/profile/cPerf.cpp

Lines changed: 52 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -178,30 +178,6 @@ void Assert(bool Expr, const char *ExprStr, const char *File, int Line) {
178178
throw std::logic_error(Str);
179179
}
180180

181-
// Returns true if the ELF file given by filename
182-
// is a shared object (DYN).
183-
bool IsSharedObject(const std::string &Fname) {
184-
// We replicate the first part of an ELF header here
185-
// so as not to rely on <elf.h>.
186-
struct PartialElfHeader {
187-
unsigned char e_ident[16];
188-
uint16_t e_type;
189-
};
190-
const int ET_DYN = 3;
191-
192-
FILE *stream = fopen(Fname.c_str(), "r");
193-
if (stream == NULL)
194-
return false;
195-
196-
PartialElfHeader H;
197-
auto NumRead = fread(&H, 1, sizeof(H), stream);
198-
assert(NumRead == sizeof(H));
199-
200-
fclose(stream);
201-
202-
return H.e_type == ET_DYN;
203-
}
204-
205181
//===----------------------------------------------------------------------===//
206182
// Perf structures. Taken from https://lwn.net/Articles/644919/
207183
//===----------------------------------------------------------------------===//
@@ -360,9 +336,20 @@ static const char* sw_event_names[PERF_COUNT_SW_MAX] = {
360336
//===----------------------------------------------------------------------===//
361337

362338
struct Map {
363-
uint64_t Start, End, Adjust;
364-
bool isSO;
339+
Map(uint64_t Start, uint64_t End, const char *Filename)
340+
: Start(Start), End(End), Filename(Filename) {}
341+
342+
uint64_t Start, End;
365343
const char *Filename;
344+
345+
// Mapping-related adjustments. Here FileOffset(func) is the offset of func
346+
// in the ELF file, VAddr(func) is the virtual address associated with this
347+
// symbol (in case of executable and shared object ELF files, st_value field
348+
// of a symbol table's entry is symbol's virtual address) and &func is the
349+
// actual memory address after relocations took place in the address space of
350+
// the process being profiled.
351+
uint64_t FileToPCOffset; // FileOffset(func) + FileToPCOffset == &func
352+
uint64_t VAddrToFileOffset; // VAddr(func) + VAddrToFileOffset == FileOffset(func)
366353
};
367354

368355
struct EventDesc {
@@ -389,7 +376,7 @@ class SymTabOutput : public std::vector<Symbol> {
389376
SymTabOutput(std::string Objdump, std::string BinaryCacheRoot)
390377
: Objdump(Objdump), BinaryCacheRoot(BinaryCacheRoot) {}
391378

392-
uint64_t fetchExecSegment(Map *M) {
379+
void fetchExecSegment(Map *M, uint64_t *FileOffset, uint64_t *VAddr) {
393380
std::string Cmd = Objdump + " -p -C " +
394381
BinaryCacheRoot + std::string(M->Filename) +
395382
#ifdef _WIN32
@@ -401,7 +388,7 @@ class SymTabOutput : public std::vector<Symbol> {
401388

402389
char *Line = nullptr, *PrevLine = nullptr;
403390
size_t LineLen = 0;
404-
uint64_t offset = 0;
391+
*FileOffset = *VAddr = 0;
405392
while (true) {
406393
if (PrevLine)
407394
free (PrevLine);
@@ -411,17 +398,22 @@ class SymTabOutput : public std::vector<Symbol> {
411398
if (Len == -1)
412399
break;
413400

414-
char* pos;
415-
if ((pos = strstr (Line, "flags r-x")) == NULL
416-
&& (pos = strstr (Line, "flags rwx")) == NULL)
401+
if (!strstr(Line, "flags r-x") && !strstr(Line, "flags rwx"))
417402
continue;
418403

419404
/* Format is weird.. but we did find the section so punt. */
420-
if ((pos = strstr (PrevLine, "vaddr ")) == NULL)
405+
const char *OFFSET_LABEL = "off ";
406+
const char *VADDR_LABEL = "vaddr ";
407+
char *pos_offset = strstr(PrevLine, OFFSET_LABEL);
408+
char *pos_vaddr = strstr(PrevLine, VADDR_LABEL);
409+
if (!pos_offset || !pos_vaddr)
421410
break;
422411

423-
pos += 6;
424-
offset = strtoull (pos, NULL, 16);
412+
pos_offset += strlen(OFFSET_LABEL);
413+
pos_vaddr += strlen(VADDR_LABEL);
414+
*FileOffset = strtoull(pos_offset, NULL, 16);
415+
*VAddr = strtoull(pos_vaddr, NULL, 16);
416+
425417
break;
426418
}
427419
if (Line)
@@ -435,7 +427,6 @@ class SymTabOutput : public std::vector<Symbol> {
435427
fclose(Stream);
436428
wait(NULL);
437429
#endif
438-
return offset;
439430
}
440431

441432
void fetchSymbols(Map *M) {
@@ -528,15 +519,14 @@ class SymTabOutput : public std::vector<Symbol> {
528519

529520
void reset(Map *M) {
530521
clear();
522+
523+
// Take possible difference between "offset" and "virtual address" of
524+
// the executable segment into account.
525+
uint64_t FileOffset, VAddr;
526+
fetchExecSegment(M, &FileOffset, &VAddr);
527+
M->VAddrToFileOffset = FileOffset - VAddr;
528+
531529
// Fetch both dynamic and static symbols, sort and unique them.
532-
/* If we're a relocatable object then take the actual start of the text
533-
segment into account. */
534-
if (M->isSO) {
535-
uint64_t segmentStart = fetchExecSegment (M);
536-
/* Adjust the symbol to a value relative to the start of the load address
537-
to match up with registerNewMapping. */
538-
M->Adjust -= segmentStart;
539-
}
540530
fetchSymbols(M);
541531

542532
std::sort(begin(), end());
@@ -670,8 +660,7 @@ class PerfReader {
670660
void emitSymbol(
671661
Symbol &Sym, Map &M,
672662
std::map<uint64_t, std::map<const char *, uint64_t>>::iterator Event,
673-
std::map<const char *, uint64_t> &SymEvents,
674-
uint64_t Adjust);
663+
std::map<const char *, uint64_t> &SymEvents);
675664
PyObject *complete();
676665

677666
private:
@@ -851,13 +840,11 @@ static uint64_t getTimeFromSampleId(unsigned char *EndOfStruct,
851840
void PerfReader::registerNewMapping(unsigned char *Buf, const char *Filename) {
852841
perf_event_mmap_common *E = (perf_event_mmap_common *)Buf;
853842
auto MapID = Maps.size();
854-
// EXEC ELF objects aren't relocated. DYN ones are,
855-
// so if it's a DYN object adjust by subtracting the
856-
// map base.
857-
bool IsSO = IsSharedObject(BinaryCacheRoot + std::string(Filename));
843+
858844
uint64_t End = E->start + E->extent;
859-
uint64_t Adjust = IsSO ? E->start - E->pgoff : 0;
860-
Maps.push_back({E->start, End, Adjust, IsSO, Filename});
845+
Map NewMapping(E->start, End, Filename);
846+
NewMapping.FileToPCOffset = E->start - E->pgoff;
847+
Maps.push_back(NewMapping);
861848

862849
unsigned char *EndOfEvent = Buf + E->header.size;
863850
// FIXME: The first EventID is used for every event.
@@ -1025,24 +1012,25 @@ void PerfReader::emitMaps() {
10251012
if (AllUnderThreshold)
10261013
continue;
10271014

1015+
Map &M = Maps[MapID];
10281016
SymTabOutput Syms(Objdump, BinaryCacheRoot);
1029-
Syms.reset(&Maps[MapID]);
1017+
Syms.reset(&M);
10301018

1031-
uint64_t Adjust = Maps[MapID].Adjust;
1019+
uint64_t VAddrToPCOffset = M.VAddrToFileOffset + M.FileToPCOffset;
10321020

10331021
// Accumulate the event totals for each symbol
10341022
auto Sym = Syms.begin();
10351023
auto Event = MapEvents.begin();
10361024
std::map<uint64_t, std::map<const char*, uint64_t>> SymToEventTotals;
10371025
while (Event != MapEvents.end() && Sym != Syms.end()) {
10381026
// Skip events until we find one after the start of Sym
1039-
auto PC = Event->first - Adjust;
1040-
if (PC < Sym->Start) {
1027+
auto VAddr = Event->first - VAddrToPCOffset;
1028+
if (VAddr < Sym->Start) {
10411029
++Event;
10421030
continue;
10431031
}
10441032
// Skip symbols until the event is before the end of Sym
1045-
if (PC >= Sym->End) {
1033+
if (VAddr >= Sym->End) {
10461034
++Sym;
10471035
continue;
10481036
}
@@ -1062,26 +1050,28 @@ void PerfReader::emitMaps() {
10621050
}
10631051
}
10641052
if (Keep)
1065-
emitSymbol(Sym, Maps[MapID], MapEvents.lower_bound(Sym.Start),
1066-
SymToEventTotals[Sym.Start], Adjust);
1053+
emitSymbol(Sym, M, MapEvents.lower_bound(Sym.Start + VAddrToPCOffset),
1054+
SymToEventTotals[Sym.Start]);
10671055
}
10681056
}
10691057
}
10701058

10711059
void PerfReader::emitSymbol(
10721060
Symbol &Sym, Map &M,
10731061
std::map<uint64_t, std::map<const char *, uint64_t>>::iterator Event,
1074-
std::map<const char *, uint64_t> &SymEvents,
1075-
uint64_t Adjust) {
1062+
std::map<const char *, uint64_t> &SymEvents) {
1063+
uint64_t VAddrToPCOffset = M.VAddrToFileOffset + M.FileToPCOffset;
10761064
ObjdumpOutput Dump(Objdump, BinaryCacheRoot);
10771065
Dump.reset(&M, Sym.Start, Sym.End);
10781066

10791067
emitFunctionStart(Sym.Name);
1068+
assert(Sym.Start <= Event->first - VAddrToPCOffset &&
1069+
Event->first - VAddrToPCOffset < Sym.End);
10801070
for (uint64_t I = Dump.next(); I < Sym.End; I = Dump.next()) {
1081-
auto PC = Event->first - Adjust;
1071+
auto VAddr = Event->first - VAddrToPCOffset;
10821072

10831073
auto Text = Dump.getText();
1084-
if (PC == I) {
1074+
if (VAddr == I) {
10851075
emitLine(I, &Event->second, Text);
10861076
++Event;
10871077
} else {
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include <stdlib.h>
2+
3+
volatile unsigned n = 0;
4+
5+
__attribute__((noinline))
6+
__attribute__((section(".text.correct")))
7+
__attribute__((aligned(0x1000)))
8+
void correct(long count) {
9+
for (long i = 0; i < count; ++i) {
10+
n += 1;
11+
}
12+
}
13+
14+
int main(int argc, const char *argv[]) {
15+
correct(atol(argv[1]));
16+
return 0;
17+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
SECTIONS {
2+
.text (. + 0x1000) : {
3+
*(.text)
4+
*(.text.correct)
5+
}
6+
} INSERT BEFORE .init;
7+
/* .init is the first section placed to the executable segment in a binary
8+
* produced by clang at the time of writing
9+
*/
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#!/bin/bash
2+
3+
# While it is quite common for ET_DYN ELF files to have virtual addresses equal
4+
# to file offsets, these are different entities. For example, the code segment
5+
# is sometimes shifted by one page or so.
6+
#
7+
# This script prepares an executable file with code contained in a section
8+
# that has VirtAddr == FileOffset + 0x1000.
9+
#
10+
# In addition, this script also creates two regular executables:
11+
# a position-independent executable and a static one to check the handling of
12+
# the more traditional layout of ELF segments for ET_DYN and ET_EXEC binaries.
13+
#
14+
# A few simple checks are performed to make sure the heuristics used to create
15+
# the required segment layouts still work.
16+
17+
cd "$(dirname $0)"
18+
19+
save_objdump_output() {
20+
local path_to_elf="$1"
21+
local addr_correct="$2"
22+
23+
local basename="$(basename "$path_to_elf")"
24+
25+
llvm-objdump "$path_to_elf" -t > "../${basename}.objdump.out"
26+
llvm-objdump "$path_to_elf" -p > "../${basename}.objdump.p.out"
27+
llvm-objdump "$path_to_elf" -j .text --disassemble-symbols=correct > "../${basename}.objdump.${addr_correct}.out"
28+
}
29+
30+
record_perf_data() {
31+
local path_to_elf="$1"
32+
local basename="$(basename "$path_to_elf")"
33+
local path_to_perf_data="../${basename}.perf_data"
34+
local num_of_iterations=100000000
35+
36+
rm -f "$path_to_perf_data"
37+
perf record -e cpu-clock -o "$path_to_perf_data" "$path_to_elf" $num_of_iterations
38+
39+
# It is probably not a good idea to put very large *.perf_data files to git
40+
size_in_bytes=$(stat --format='%s' "$path_to_perf_data")
41+
if [ $size_in_bytes -gt 50000 ]; then
42+
echo "perf produced too large output file ${path_to_perf_data}, try decreasing"
43+
echo "the number of iterations or passing -F option to 'perf record'."
44+
exit 1
45+
fi
46+
}
47+
48+
save_test_case() {
49+
local path_to_elf="$1"
50+
local addr_correct="$2"
51+
52+
record_perf_data "$path_to_elf"
53+
save_objdump_output "$path_to_elf" $addr_correct
54+
}
55+
56+
check_file() {
57+
local file="$1"
58+
local line="$2"
59+
60+
# Use pcregrep to simplify handling of newlines (it is possible to embed \n
61+
# into the regex and not have them being matched by a dot)
62+
if ! pcregrep -M "$line" "$file"; then
63+
echo "Unexpected test case generated: file '$file' should contain '$line'"
64+
exit 1
65+
fi
66+
}
67+
68+
clang -Os -o /tmp/segments-shifted segments.c -pie -Wl,-T,segments.lds
69+
clang -Os -o /tmp/segments-dyn segments.c -pie
70+
clang -Os -o /tmp/segments-exec segments.c -static
71+
72+
save_test_case /tmp/segments-shifted 0x2000
73+
check_file ../segments-shifted.objdump.out "00002000 .* correct"
74+
# The expected objdump -p output is something like this (note off != vaddr):
75+
# LOAD off 0x0000000000000618 vaddr 0x0000000000001618 paddr 0x0000000000001618 align 2**12
76+
# filesz 0x0000000000002a3d memsz 0x0000000000002a3d flags r-x
77+
check_file ../segments-shifted.objdump.p.out "LOAD off 0x(0+)0000(...) vaddr 0x\g{1}0001\g{2} paddr.*\n.*flags r-x"
78+
79+
# Feel free to update the value of "correct" symbol in the static case if it is changed
80+
save_test_case /tmp/segments-exec 0x403000
81+
check_file ../segments-exec.objdump.out "00403000 .* correct"
82+
check_file ../segments-exec.objdump.p.out "LOAD off 0x(0+)0001000 vaddr 0x(0+)0401000 paddr.*\n.*flags r-x"
83+
84+
save_test_case /tmp/segments-dyn 0x3000
85+
check_file ../segments-dyn.objdump.out "00003000 .* correct"
86+
check_file ../segments-dyn.objdump.p.out "LOAD off 0x(0+)0001000 vaddr 0x(0+)0001000 paddr.*\n.*flags r-x"

tests/testing/Inputs/fake-objdump.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@ def exit_with_fake_output(suffix):
1616
if arg.startswith('-t'):
1717
exit_with_fake_output('out')
1818

19+
if arg.startswith('-p'):
20+
exit_with_fake_output('p.out')
21+
1922
sys.exit(1)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Fake "objdump -p" output.
2+
3+
The original test case was added when ET_EXEC and ET_DYN ELF binaries were
4+
handled differently (assuming ET_EXEC by default - if IsSharedObject() function
5+
cannot find the file).
6+
7+
This test input was added to fix the existing tests after the removal of the
8+
heuristic relying on virtual addresses being equal to file offsets for ET_DYN
9+
case and to final addresses in the process' address space for ET_EXEC case,
10+
respectively.
11+
12+
The "off" and "vaddr" fields are set to some reasonable values based on the
13+
mmap2 records from *.perf_data file.
14+
15+
LOAD off 0x00000000 vaddr 0x00400000 paddr ...
16+
... ... ... ... flags r-x
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
Fake "objdump -p" output.
2+
3+
The original test case was added when ET_EXEC and ET_DYN ELF binaries were
4+
handled differently (assuming ET_EXEC by default - if IsSharedObject() function
5+
cannot find the file).
6+
7+
This test input was added to fix the existing tests after the removal of the
8+
heuristic relying on virtual addresses being equal to file offsets for ET_DYN
9+
case and to final addresses in the process' address space for ET_EXEC case,
10+
respectively.
11+
12+
The "off" and "vaddr" fields are set to some reasonable values based on the
13+
mmap2 records from *.perf_data file.
14+
15+
LOAD off 0x00000000 vaddr 0x00400000 paddr ...
16+
... ... ... ... flags r-x
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
/tmp/segments-dyn: file format elf64-x86-64
3+
4+
Disassembly of section .text:
5+
6+
0000000000003000 <correct>:
7+
3000: 48 85 ff testq %rdi, %rdi
8+
3003: 7e 0b jle 0x3010 <correct+0x10>
9+
3005: ff 05 11 30 00 00 incl 12305(%rip) # 0x601c <n>
10+
300b: 48 ff cf decq %rdi
11+
300e: 75 f5 jne 0x3005 <correct+0x5>
12+
3010: c3 retq

0 commit comments

Comments
 (0)