Skip to content

Commit 4a1c33d

Browse files
authored
[llvm-gsymutil] Add support for merged functions lookup differentiation (llvm#122409)
This update introduces the ability to filter merged functions during lookups based on regex patterns derived from call site information in a previous call to `llvm-gsymutil`. The regex patterns, extracted from call sites, can then be passed to subsequent calls using the `--merged-functions-filter` option along with `--merged-functions` and `--address` (or `--addresses-from-stdin`). This allows for precise filtering of functions during lookups, giving accurate results for call stacks that contain merged functions.
1 parent b4576bb commit 4a1c33d

File tree

7 files changed

+171
-2
lines changed

7 files changed

+171
-2
lines changed

llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ struct CallSiteInfo {
4949
/// Bitwise OR of CallSiteInfo::Flags values
5050
uint8_t Flags = CallSiteInfo::Flags::None;
5151

52+
/// Equality comparison operator for CallSiteInfo.
53+
bool operator==(const CallSiteInfo &RHS) const {
54+
return ReturnOffset == RHS.ReturnOffset && MatchRegex == RHS.MatchRegex &&
55+
Flags == RHS.Flags;
56+
}
57+
58+
/// Inequality comparison operator for CallSiteInfo.
59+
bool operator!=(const CallSiteInfo &RHS) const { return !(*this == RHS); }
60+
5261
/// Decode a CallSiteInfo object from a binary data stream.
5362
///
5463
/// \param Data The binary stream to read the data from.

llvm/include/llvm/DebugInfo/GSYM/LookupResult.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,33 @@ struct LookupResult {
4949
/// deepest inline function will appear at index zero in the source locations
5050
/// array, and the concrete function will appear at the end of the array.
5151
SourceLocations Locations;
52+
53+
/// Function name regex patterns associated with a call site at the lookup
54+
/// address. This vector will be populated when:
55+
/// 1. The lookup address matches a call site's return address in a function
56+
/// 2. The call site has associated regex patterns that describe what
57+
/// functions can be called from that location
58+
///
59+
/// The regex patterns can be used to validate function calls during runtime
60+
/// checking or symbolication. For example:
61+
/// - Patterns like "^foo$" indicate the call site can only call function
62+
/// "foo"
63+
/// - Patterns like "^std::" indicate the call site can call any function in
64+
/// the std namespace
65+
/// - Multiple patterns allow matching against a set of allowed functions
66+
///
67+
/// The patterns are stored as string references into the GSYM string table.
68+
/// This information is typically loaded from:
69+
/// - DWARF debug info call site entries
70+
/// - External YAML files specifying call site patterns
71+
/// - Other debug info formats that encode call site constraints
72+
///
73+
/// The patterns will be empty if:
74+
/// - The lookup address is not at the return address of a call site
75+
/// - The call site has no associated function name constraints
76+
/// - Call site info was not included when creating the GSYM file
77+
std::vector<StringRef> CallSiteFuncRegex;
78+
5279
std::string getSourceFile(uint32_t Index) const;
5380
};
5481

@@ -59,6 +86,8 @@ inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
5986
return false;
6087
if (LHS.FuncName != RHS.FuncName)
6188
return false;
89+
if (LHS.CallSiteFuncRegex != RHS.CallSiteFuncRegex)
90+
return false;
6291
return LHS.Locations == RHS.Locations;
6392
}
6493

llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,23 @@ FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
301301
InlineInfoData = InfoData;
302302
break;
303303

304+
case InfoType::CallSiteInfo:
305+
if (auto CSIC = CallSiteInfoCollection::decode(InfoData)) {
306+
// Find matching call site based on relative offset
307+
for (const auto &CS : CSIC->CallSites) {
308+
// Check if the call site matches the lookup address
309+
if (CS.ReturnOffset == Addr - FuncAddr) {
310+
// Get regex patterns
311+
for (uint32_t RegexOffset : CS.MatchRegex) {
312+
LR.CallSiteFuncRegex.push_back(GR.getString(RegexOffset));
313+
}
314+
break;
315+
}
316+
}
317+
} else {
318+
return CSIC.takeError();
319+
}
320+
304321
default:
305322
break;
306323
}

llvm/lib/DebugInfo/GSYM/LookupResult.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,16 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) {
6868
if (IsInlined)
6969
OS << " [inlined]";
7070
}
71+
72+
if (!LR.CallSiteFuncRegex.empty()) {
73+
OS << "\n CallSites: ";
74+
for (size_t i = 0; i < LR.CallSiteFuncRegex.size(); ++i) {
75+
if (i > 0)
76+
OS << ", ";
77+
OS << LR.CallSiteFuncRegex[i];
78+
}
79+
}
80+
7181
OS << '\n';
7282
return OS;
7383
}

llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,50 @@
4242
# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy2]
4343
# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1]
4444

45+
46+
### Check that we can correctly resove merged functions using callstacks:
47+
### Resolve two callstacks containing merged functions.
48+
### We use the value obtained from `CallSites:[FILTER]` to pass to the next call to `llvm-gsymutil` via `--merged-functions-filter`.
49+
### The callstacks resolve differently based on the merged functions filter.
50+
### 0x00000001000003d0 => 0x000000010000037c => 0x000000010000035c => 0x0000000100000340
51+
### 0x00000001000003e8 =========================> 0x000000010000035c => 0x0000000100000340
52+
53+
# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003d0 | FileCheck --check-prefix=CHECK-C1 %s
54+
# CHECK-C1: 0x00000001000003d0: main + 32 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:63
55+
# CHECK-C1-NEXT: CallSites: function2_copy2
56+
57+
# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000037c --merged-functions-filter="function2_copy2" | FileCheck --check-prefix=CHECK-C2 %s
58+
# CHECK-C2: 0x000000010000037c: function_inlined + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:35 [inlined]
59+
# CHECK-C2-NEXT: function2_copy2 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:48
60+
# CHECK-C2-NEXT: CallSites: function3_copy1
61+
62+
# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy1" | FileCheck --check-prefix=CHECK-C3 %s
63+
# CHECK-C3: Found 1 function at address 0x000000010000035c:
64+
# CHECK-C3-NEXT: 0x000000010000035c: function3_copy1 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:28
65+
# CHECK-C3-NEXT: CallSites: function4_copy1
66+
67+
# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy1" | FileCheck --check-prefix=CHECK-C4 %s
68+
# CHECK-C4: Found 1 function at address 0x0000000100000340:
69+
# CHECK-C4-NEXT: 0x0000000100000340: function4_copy1 + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:14
70+
71+
### ----------------------------------------------------------------------------------------------------------------------------------
72+
### Resolve the 2nd call stack - the 2nd and 3rd addresses are the same but they resolve to a different function because of the filter
73+
74+
# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --address=0x00000001000003e8 --merged-functions | FileCheck --check-prefix=CHECK-C5 %s
75+
# CHECK-C5: Found 1 function at address 0x00000001000003e8:
76+
# CHECK-C5-NEXT: 0x00000001000003e8: main + 56 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:64
77+
# CHECK-C5-NEXT: CallSites: function3_copy2
78+
79+
# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy2" | FileCheck --check-prefix=CHECK-C6 %s
80+
# CHECK-C6: Found 1 function at address 0x000000010000035c:
81+
# CHECK-C6-NEXT: 0x000000010000035c: function3_copy2 + 16 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:28
82+
# CHECK-C6-NEXT: CallSites: function4_copy2
83+
84+
# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --merged-functions-filter="function4_copy2" --address=0x0000000100000340 | FileCheck --check-prefix=CHECK-C7 %s
85+
# CHECK-C7: Found 1 function at address 0x0000000100000340:
86+
# CHECK-C7-NEXT: 0x0000000100000340: function4_copy2 + 8 @ /tmp/tst{{[/\\]}}out/merged_funcs_test.cpp:14
87+
88+
4589
#--- merged_funcs_test.cpp
4690
#define ATTRIB extern "C" __attribute__((noinline))
4791
volatile int global_result = 0;

llvm/tools/llvm-gsymutil/Opts.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,8 @@ def addresses_from_stdin :
4646
defm json_summary_file :
4747
Eq<"json-summary-file",
4848
"Output a categorized summary of errors into the JSON file specified.">;
49+
defm merged_functions_filter :
50+
Eq<"merged-functions-filter",
51+
"When used with --address/--addresses-from-stdin and --merged-functions,\n"
52+
"filters the merged functions output to only show functions matching any of the specified regex patterns.\n"
53+
"Can be specified multiple times.">;

llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ static bool LookupAddressesFromStdin;
101101
static bool UseMergedFunctions = false;
102102
static bool LoadDwarfCallSites = false;
103103
static std::string CallSiteYamlPath;
104+
static std::vector<std::string> MergedFunctionsFilters;
104105

105106
static void parseArgs(int argc, char **argv) {
106107
GSYMUtilOptTable Tbl;
@@ -194,6 +195,24 @@ static void parseArgs(int argc, char **argv) {
194195
}
195196

196197
LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
198+
199+
for (const llvm::opt::Arg *A :
200+
Args.filtered(OPT_merged_functions_filter_EQ)) {
201+
MergedFunctionsFilters.push_back(A->getValue());
202+
// Validate the filter is only used with correct flags
203+
if (LookupAddresses.empty() && !LookupAddressesFromStdin) {
204+
llvm::errs() << ToolName
205+
<< ": --merged-functions-filter can only be used with "
206+
"--address/--addresses-from-stdin\n";
207+
std::exit(1);
208+
}
209+
if (!UseMergedFunctions) {
210+
llvm::errs()
211+
<< ToolName
212+
<< ": --merged-functions-filter requires --merged-functions\n";
213+
std::exit(1);
214+
}
215+
}
197216
}
198217

199218
/// @}
@@ -510,9 +529,43 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
510529
static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
511530
if (UseMergedFunctions) {
512531
if (auto Results = Gsym.lookupAll(Addr)) {
513-
OS << "Found " << Results->size() << " functions at address "
514-
<< HEX64(Addr) << ":\n";
532+
// If we have filters, count matching results first
533+
size_t NumMatching = Results->size();
534+
if (!MergedFunctionsFilters.empty()) {
535+
NumMatching = 0;
536+
for (const auto &Result : *Results) {
537+
bool Matches = false;
538+
for (const auto &Filter : MergedFunctionsFilters) {
539+
Regex Pattern(Filter);
540+
if (Pattern.match(Result.FuncName)) {
541+
Matches = true;
542+
break;
543+
}
544+
}
545+
if (Matches)
546+
NumMatching++;
547+
}
548+
}
549+
550+
OS << "Found " << NumMatching << " function"
551+
<< (NumMatching != 1 ? "s" : "") << " at address " << HEX64(Addr)
552+
<< ":\n";
553+
515554
for (size_t i = 0; i < Results->size(); ++i) {
555+
// Skip if doesn't match any filter
556+
if (!MergedFunctionsFilters.empty()) {
557+
bool Matches = false;
558+
for (const auto &Filter : MergedFunctionsFilters) {
559+
Regex Pattern(Filter);
560+
if (Pattern.match(Results->at(i).FuncName)) {
561+
Matches = true;
562+
break;
563+
}
564+
}
565+
if (!Matches)
566+
continue;
567+
}
568+
516569
OS << " " << Results->at(i);
517570

518571
if (i != Results->size() - 1)
@@ -529,6 +582,8 @@ static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
529582
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
530583
}
531584
}
585+
// Don't print call site info if --merged-functions is not specified.
586+
Result->CallSiteFuncRegex.clear();
532587
OS << Result.get();
533588
} else {
534589
if (Verbose)

0 commit comments

Comments
 (0)