Skip to content

Commit 35626e9

Browse files
authored
[DLCov] Origin-Tracking: Enable collecting and symbolizing stack traces (#143591)
This patch is part of a series that adds origin-tracking to the debugify source location coverage checks, allowing us to report symbolized stack traces of the point where missing source locations appear. This patch adds a pair of new functions in `signals.h` that can be used to collect and symbolize stack traces respectively. This has major implementation overlap with the existing stack trace collection/symbolizing methods, but the existing functions are specialized for dumping a stack trace to stderr when LLVM crashes, while these new functions are meant to be called repeatedly during the execution of the program, and therefore we need a separate set of functions.
1 parent a2c9f7d commit 35626e9

File tree

4 files changed

+198
-65
lines changed

4 files changed

+198
-65
lines changed

llvm/include/llvm/Support/Signals.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,25 @@
1414
#ifndef LLVM_SUPPORT_SIGNALS_H
1515
#define LLVM_SUPPORT_SIGNALS_H
1616

17+
#include "llvm/Config/llvm-config.h"
1718
#include "llvm/Support/Compiler.h"
1819
#include <cstdint>
1920
#include <string>
2021

22+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
23+
#include "llvm/ADT/DenseMap.h"
24+
#include "llvm/ADT/DenseSet.h"
25+
#include "llvm/ADT/SmallVector.h"
26+
namespace llvm {
27+
// Typedefs that are convenient but only used by the stack-trace-collection code
28+
// added if DebugLoc origin-tracking is enabled.
29+
using AddressSet = DenseSet<void *, DenseMapInfo<void *, void>>;
30+
using SymbolizedAddressMap =
31+
DenseMap<void *, SmallVector<std::string, 0>, DenseMapInfo<void *, void>,
32+
detail::DenseMapPair<void *, SmallVector<std::string, 0>>>;
33+
} // namespace llvm
34+
#endif
35+
2136
namespace llvm {
2237
class StringRef;
2338
class raw_ostream;
@@ -57,6 +72,28 @@ LLVM_ABI void DisableSystemDialogsOnCrash();
5772
/// specified, the entire frame is printed.
5873
LLVM_ABI void PrintStackTrace(raw_ostream &OS, int Depth = 0);
5974

75+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
76+
#ifdef NDEBUG
77+
#error DebugLoc origin-tracking should not be enabled in Release builds.
78+
#endif
79+
/// Populates the given array with a stack trace of the current program, up to
80+
/// MaxDepth frames. Returns the number of frames returned, which will be
81+
/// inserted into \p StackTrace from index 0. All entries after the returned
82+
/// depth will be unmodified. NB: This is only intended to be used for
83+
/// introspection of LLVM by Debugify, will not be enabled in release builds,
84+
/// and should not be relied on for other purposes.
85+
template <unsigned long MaxDepth>
86+
int getStackTrace(std::array<void *, MaxDepth> &StackTrace);
87+
88+
/// Takes a set of \p Addresses, symbolizes them and stores the result in the
89+
/// provided \p SymbolizedAddresses map.
90+
/// NB: This is only intended to be used for introspection of LLVM by
91+
/// Debugify, will not be enabled in release builds, and should not be relied
92+
/// on for other purposes.
93+
void symbolizeAddresses(AddressSet &Addresses,
94+
SymbolizedAddressMap &SymbolizedAddresses);
95+
#endif
96+
6097
// Run all registered signal handlers.
6198
LLVM_ABI void RunSignalHandlers();
6299

llvm/lib/Support/Signals.cpp

Lines changed: 141 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
#include "llvm/Support/raw_ostream.h"
3232
#include <array>
3333
#include <cmath>
34-
#include <vector>
3534

3635
//===----------------------------------------------------------------------===//
3736
//=== WARNING: Implementation here must contain only TRULY operating system
@@ -137,47 +136,28 @@ static FormattedNumber format_ptr(void *PC) {
137136
return format_hex((uint64_t)PC, PtrWidth);
138137
}
139138

140-
/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
141-
LLVM_ATTRIBUTE_USED
142-
static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
143-
int Depth, llvm::raw_ostream &OS) {
144-
if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
145-
return false;
146-
147-
// Don't recursively invoke the llvm-symbolizer binary.
148-
if (Argv0.contains("llvm-symbolizer"))
149-
return false;
150-
151-
// FIXME: Subtract necessary number from StackTrace entries to turn return addresses
152-
// into actual instruction addresses.
153-
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
154-
// alongside our binary, then in $PATH.
155-
ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
156-
if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
157-
LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
158-
} else if (!Argv0.empty()) {
159-
StringRef Parent = llvm::sys::path::parent_path(Argv0);
160-
if (!Parent.empty())
161-
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer", Parent);
162-
}
163-
if (!LLVMSymbolizerPathOrErr)
164-
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
165-
if (!LLVMSymbolizerPathOrErr)
166-
return false;
167-
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
168-
169-
// If we don't know argv0 or the address of main() at this point, try
170-
// to guess it anyway (it's possible on some platforms).
171-
std::string MainExecutableName =
172-
sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
173-
: sys::fs::getMainExecutable(nullptr, nullptr);
139+
/// Reads a file \p Filename written by llvm-symbolizer containing function
140+
/// names and source locations for the addresses in \p AddressList and returns
141+
/// the strings in a vector of pairs, where the first pair element is the index
142+
/// of the corresponding entry in AddressList and the second is the symbolized
143+
/// frame, in a format based on the sanitizer stack trace printer, with the
144+
/// exception that it does not write out frame numbers (i.e. "#2 " for the
145+
/// third address), as it is not assumed that \p AddressList corresponds to a
146+
/// single stack trace.
147+
/// There may be multiple returned entries for a single \p AddressList entry if
148+
/// that frame address corresponds to one or more inlined frames; in this case,
149+
/// all frames for an address will appear contiguously and in-order.
150+
std::optional<SmallVector<std::pair<unsigned, std::string>, 0>>
151+
collectAddressSymbols(void **AddressList, unsigned AddressCount,
152+
const char *MainExecutableName,
153+
const std::string &LLVMSymbolizerPath) {
174154
BumpPtrAllocator Allocator;
175155
StringSaver StrPool(Allocator);
176-
std::vector<const char *> Modules(Depth, nullptr);
177-
std::vector<intptr_t> Offsets(Depth, 0);
178-
if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
179-
MainExecutableName.c_str(), StrPool))
180-
return false;
156+
SmallVector<const char *, 0> Modules(AddressCount, nullptr);
157+
SmallVector<intptr_t, 0> Offsets(AddressCount, 0);
158+
if (!findModulesAndOffsets(AddressList, AddressCount, Modules.data(),
159+
Offsets.data(), MainExecutableName, StrPool))
160+
return {};
181161
int InputFD;
182162
SmallString<32> InputFile, OutputFile;
183163
sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
@@ -187,9 +167,9 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
187167

188168
{
189169
raw_fd_ostream Input(InputFD, true);
190-
for (int i = 0; i < Depth; i++) {
191-
if (Modules[i])
192-
Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
170+
for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
171+
if (Modules[AddrIdx])
172+
Input << Modules[AddrIdx] << " " << (void *)Offsets[AddrIdx] << "\n";
193173
}
194174
}
195175

@@ -206,53 +186,149 @@ static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
206186
int RunResult =
207187
sys::ExecuteAndWait(LLVMSymbolizerPath, Args, std::nullopt, Redirects);
208188
if (RunResult != 0)
209-
return false;
189+
return {};
210190

211-
// This report format is based on the sanitizer stack trace printer. See
212-
// sanitizer_stacktrace_printer.cc in compiler-rt.
191+
SmallVector<std::pair<unsigned, std::string>, 0> Result;
213192
auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
214193
if (!OutputBuf)
215-
return false;
194+
return {};
216195
StringRef Output = OutputBuf.get()->getBuffer();
217196
SmallVector<StringRef, 32> Lines;
218197
Output.split(Lines, "\n");
219-
auto CurLine = Lines.begin();
220-
int frame_no = 0;
221-
for (int i = 0; i < Depth; i++) {
222-
auto PrintLineHeader = [&]() {
223-
OS << right_justify(formatv("#{0}", frame_no++).str(),
224-
std::log10(Depth) + 2)
225-
<< ' ' << format_ptr(StackTrace[i]) << ' ';
226-
};
227-
if (!Modules[i]) {
228-
PrintLineHeader();
229-
OS << '\n';
198+
auto *CurLine = Lines.begin();
199+
// Lines contains the output from llvm-symbolizer, which should contain for
200+
// each address with a module in order of appearance, one or more lines
201+
// containing the function name and line associated with that address,
202+
// followed by an empty line.
203+
// For each address, adds an output entry for every real or inlined frame at
204+
// that address. For addresses without known modules, we have a single entry
205+
// containing just the formatted address; for all other output entries, we
206+
// output the function entry if it is known, and either the line number if it
207+
// is known or the module+address offset otherwise.
208+
for (unsigned AddrIdx = 0; AddrIdx < AddressCount; AddrIdx++) {
209+
if (!Modules[AddrIdx]) {
210+
auto &SymbolizedFrame = Result.emplace_back(std::make_pair(AddrIdx, ""));
211+
raw_string_ostream OS(SymbolizedFrame.second);
212+
OS << format_ptr(AddressList[AddrIdx]);
230213
continue;
231214
}
232215
// Read pairs of lines (function name and file/line info) until we
233216
// encounter empty line.
234217
for (;;) {
235218
if (CurLine == Lines.end())
236-
return false;
219+
return {};
237220
StringRef FunctionName = *CurLine++;
238221
if (FunctionName.empty())
239222
break;
240-
PrintLineHeader();
223+
auto &SymbolizedFrame = Result.emplace_back(std::make_pair(AddrIdx, ""));
224+
raw_string_ostream OS(SymbolizedFrame.second);
225+
OS << format_ptr(AddressList[AddrIdx]) << ' ';
241226
if (!FunctionName.starts_with("??"))
242227
OS << FunctionName << ' ';
243228
if (CurLine == Lines.end())
244-
return false;
229+
return {};
245230
StringRef FileLineInfo = *CurLine++;
246-
if (!FileLineInfo.starts_with("??"))
231+
if (!FileLineInfo.starts_with("??")) {
247232
OS << FileLineInfo;
248-
else
249-
OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
250-
OS << "\n";
233+
} else {
234+
OS << "(" << Modules[AddrIdx] << '+' << format_hex(Offsets[AddrIdx], 0)
235+
<< ")";
236+
}
251237
}
252238
}
239+
return Result;
240+
}
241+
242+
ErrorOr<std::string> getLLVMSymbolizerPath(StringRef Argv0 = {}) {
243+
ErrorOr<std::string> LLVMSymbolizerPathOrErr = std::error_code();
244+
if (const char *Path = getenv(LLVMSymbolizerPathEnv)) {
245+
LLVMSymbolizerPathOrErr = sys::findProgramByName(Path);
246+
} else if (!Argv0.empty()) {
247+
StringRef Parent = llvm::sys::path::parent_path(Argv0);
248+
if (!Parent.empty())
249+
LLVMSymbolizerPathOrErr =
250+
sys::findProgramByName("llvm-symbolizer", Parent);
251+
}
252+
if (!LLVMSymbolizerPathOrErr)
253+
LLVMSymbolizerPathOrErr = sys::findProgramByName("llvm-symbolizer");
254+
return LLVMSymbolizerPathOrErr;
255+
}
256+
257+
/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
258+
LLVM_ATTRIBUTE_USED
259+
static bool printSymbolizedStackTrace(StringRef Argv0, void **StackTrace,
260+
int Depth, llvm::raw_ostream &OS) {
261+
if (DisableSymbolicationFlag || getenv(DisableSymbolizationEnv))
262+
return false;
263+
264+
// Don't recursively invoke the llvm-symbolizer binary.
265+
if (Argv0.contains("llvm-symbolizer"))
266+
return false;
267+
268+
// FIXME: Subtract necessary number from StackTrace entries to turn return
269+
// addresses into actual instruction addresses.
270+
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
271+
// alongside our binary, then in $PATH.
272+
ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath(Argv0);
273+
if (!LLVMSymbolizerPathOrErr)
274+
return false;
275+
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
276+
277+
// If we don't know argv0 or the address of main() at this point, try
278+
// to guess it anyway (it's possible on some platforms).
279+
std::string MainExecutableName =
280+
sys::fs::exists(Argv0) ? (std::string)std::string(Argv0)
281+
: sys::fs::getMainExecutable(nullptr, nullptr);
282+
283+
auto SymbolizedAddressesOpt = collectAddressSymbols(
284+
StackTrace, Depth, MainExecutableName.c_str(), LLVMSymbolizerPath);
285+
if (!SymbolizedAddressesOpt)
286+
return false;
287+
for (unsigned FrameNo = 0; FrameNo < SymbolizedAddressesOpt->size();
288+
++FrameNo) {
289+
OS << right_justify(formatv("#{0}", FrameNo).str(), std::log10(Depth) + 2)
290+
<< ' ' << (*SymbolizedAddressesOpt)[FrameNo].second << '\n';
291+
}
253292
return true;
254293
}
255294

295+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
296+
void sys::symbolizeAddresses(AddressSet &Addresses,
297+
SymbolizedAddressMap &SymbolizedAddresses) {
298+
assert(!DisableSymbolicationFlag && !getenv(DisableSymbolizationEnv) &&
299+
"Debugify origin stacktraces require symbolization to be enabled.");
300+
301+
// Convert Set of Addresses to ordered list.
302+
SmallVector<void *, 0> AddressList(Addresses.begin(), Addresses.end());
303+
if (AddressList.empty())
304+
return;
305+
llvm::sort(AddressList);
306+
307+
// Use llvm-symbolizer tool to symbolize the stack traces. First look for it
308+
// alongside our binary, then in $PATH.
309+
ErrorOr<std::string> LLVMSymbolizerPathOrErr = getLLVMSymbolizerPath();
310+
if (!LLVMSymbolizerPathOrErr)
311+
report_fatal_error("Debugify origin stacktraces require llvm-symbolizer");
312+
const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
313+
314+
// Try to guess the main executable name, since we don't have argv0 available
315+
// here.
316+
std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
317+
318+
auto SymbolizedAddressesOpt =
319+
collectAddressSymbols(AddressList.begin(), AddressList.size(),
320+
MainExecutableName.c_str(), LLVMSymbolizerPath);
321+
if (!SymbolizedAddressesOpt)
322+
return;
323+
for (auto SymbolizedFrame : *SymbolizedAddressesOpt) {
324+
SmallVector<std::string, 0> &SymbolizedAddrs =
325+
SymbolizedAddresses[AddressList[SymbolizedFrame.first]];
326+
SymbolizedAddrs.push_back(SymbolizedFrame.second);
327+
}
328+
return;
329+
}
330+
#endif
331+
256332
static bool printMarkupContext(raw_ostream &OS, const char *MainExecutableName);
257333

258334
LLVM_ATTRIBUTE_USED

llvm/lib/Support/Unix/Signals.inc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,21 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
507507
return 0;
508508
}
509509

510+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
511+
#if !defined(HAVE_BACKTRACE)
512+
#error DebugLoc origin-tracking currently requires `backtrace()`.
513+
#endif
514+
namespace llvm {
515+
namespace sys {
516+
template <unsigned long MaxDepth>
517+
int getStackTrace(std::array<void *, MaxDepth> &StackTrace) {
518+
return backtrace(StackTrace.data(), MaxDepth);
519+
}
520+
template int getStackTrace<16ul>(std::array<void *, 16ul> &);
521+
} // namespace sys
522+
} // namespace llvm
523+
#endif
524+
510525
/// If this is an ELF platform, we can find all loaded modules and their virtual
511526
/// addresses with dl_iterate_phdr.
512527
static bool findModulesAndOffsets(void **StackTrace, int Depth,

llvm/lib/Support/Windows/Signals.inc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
// This file provides the Win32 specific implementation of the Signals class.
1010
//
1111
//===----------------------------------------------------------------------===//
12+
#include "llvm/Config/llvm-config.h"
1213
#include "llvm/Support/ConvertUTF.h"
1314
#include "llvm/Support/ExitCodes.h"
1415
#include "llvm/Support/FileSystem.h"
@@ -478,6 +479,10 @@ void sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
478479
}
479480
} // namespace llvm
480481

482+
#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
483+
#error DebugLoc origin-tracking currently unimplemented for Windows.
484+
#endif
485+
481486
static void LocalPrintStackTrace(raw_ostream &OS, PCONTEXT C) {
482487
STACKFRAME64 StackFrame{};
483488
CONTEXT Context{};

0 commit comments

Comments
 (0)