From 159091e4dc422fc262a8575e4a12b14f2451e554 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Mon, 7 Jul 2025 18:52:48 -0700 Subject: [PATCH 1/2] [LLVM] Fix an ASAN error in `lookupLLVMIntrinsicByName` Fix unnecessary conversion of C-String to StringRef in the `Cmp` lambda inside `lookupLLVMIntrinsicByName`. This both fixes an ASAN error in the code that happens when the `Name` StringRef passed in is not a Null terminated StringRef, and additionally should potentially speed up the code as well by eliminating the unnecessary computation of string length every time a C String is converted to StringRef in this code (It seems practically this computation is eliminated in optimized builds, but this will avoid it at O0 builds as well). Added a unit test that demomstrates this issue by building LLVM with these options: ``` -DCMAKE_BUILD_TYPE=Debug -DLLVM_USE_SANITIZER=Address` -DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF ``` The error reported is as follows: ``` ==462665==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x5030000391a2 at pc 0x56525cc30bbf bp 0x7fff9e4ccc60 sp 0x7fff9e4cc428 READ of size 19 at 0x5030000391a2 thread T0 #0 0x56525cc30bbe in strlen (upstream-llvm-second/llvm-project/build/unittests/IR/IRTests+0x713bbe) (BuildId: 0651acf1e582a4d2) #1 0x7f8ff22ad334 in std::char_traits::length(char const*) /usr/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/char_traits.h:399:9 #2 0x7f8ff22a34a0 in llvm::StringRef::StringRef(char const*) /home/rjoshi/upstream-llvm-second/llvm-project/llvm/include/llvm/ADT/StringRef.h:96:33 #3 0x7f8ff28ca184 in _ZZL25lookupLLVMIntrinsicByNameN4llvm8ArrayRefIjEENS_9StringRefES2_ENK3$_0clIjPKcEEDaT_T0_ upstream-llvm-second/llvm-project/llvm/lib/IR/Intrinsics.cpp:673:18 ``` --- llvm/include/llvm/ADT/StringTable.h | 12 ++++++++---- llvm/lib/IR/Intrinsics.cpp | 24 ++++++++++++------------ llvm/unittests/IR/IntrinsicsTest.cpp | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h index b3c4a414ed6b4..c089a070d4b57 100644 --- a/llvm/include/llvm/ADT/StringTable.h +++ b/llvm/include/llvm/ADT/StringTable.h @@ -85,14 +85,18 @@ class StringTable { "Last byte must be a null byte."); } - // Get a string from the table starting with the provided offset. The returned - // `StringRef` is in fact null terminated, and so can be converted safely to a - // C-string if necessary for a system API. - constexpr StringRef operator[](Offset O) const { + // Returns the raw C string from the table starting with the provided offset. + // The returned string is null terminated. + constexpr const char *getCString(Offset O) const { assert(O.value() < Table.size() && "Out of bounds offset!"); return Table.data() + O.value(); } + // Get a string from the table starting with the provided offset. The returned + // `StringRef` is in fact null terminated, and so can be converted safely to a + // C-string if necessary for a system API. + constexpr StringRef operator[](Offset O) const { return getCString(O); } + /// Returns the byte size of the table. constexpr size_t size() const { return Table.size(); } diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp index e631419d5e1c2..68ab15eab1aea 100644 --- a/llvm/lib/IR/Intrinsics.cpp +++ b/llvm/lib/IR/Intrinsics.cpp @@ -660,20 +660,20 @@ static int lookupLLVMIntrinsicByName(ArrayRef NameOffsetTable, // `equal_range` requires the comparison to work with either side being an // offset or the value. Detect which kind each side is to set up the // compared strings. - StringRef LHSStr; - if constexpr (std::is_integral_v) { - LHSStr = IntrinsicNameTable[LHS]; - } else { + const char *LHSStr; + if constexpr (std::is_integral_v) + LHSStr = IntrinsicNameTable.getCString(LHS); + else LHSStr = LHS; - } - StringRef RHSStr; - if constexpr (std::is_integral_v) { - RHSStr = IntrinsicNameTable[RHS]; - } else { + + const char *RHSStr; + if constexpr (std::is_integral_v) + RHSStr = IntrinsicNameTable.getCString(RHS); + else RHSStr = RHS; - } - return strncmp(LHSStr.data() + CmpStart, RHSStr.data() + CmpStart, - CmpEnd - CmpStart) < 0; + + return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) < + 0; }; LastLow = Low; std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp); diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp index ad9af2075e371..fff13d51897f8 100644 --- a/llvm/unittests/IR/IntrinsicsTest.cpp +++ b/llvm/unittests/IR/IntrinsicsTest.cpp @@ -80,6 +80,28 @@ TEST(IntrinsicNameLookup, Basic) { EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024")); } +TEST(IntrinsicNameLookup, NonNullterminatedStringRef) { + using namespace Intrinsic; + // This reproduces an issue where lookupIntrinsicID() can access memory beyond + // the bounds of the passed in StringRef. For ASAN to catch this as an error, + // create a StringRef using heap allocated memory and make it not null + // terminated. + + // ASAN will report a "AddressSanitizer: heap-buffer-overflow" error in + // `lookupLLVMIntrinsicByName` when LLVM is built with these options: + // -DCMAKE_BUILD_TYPE=Debug + // -DLLVM_USE_SANITIZER=Address + // -DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF + + // Make an intrinsic name "llvm.memcpy.inline" on the heap. + std::string Name = "llvm.memcpy.inline"; + assert(Name.size() == 18); + std::unique_ptr Data = std::make_unique(Name.size()); + std::strncpy(Data.get(), Name.data(), Name.size()); + StringRef S(Data.get(), Name.size()); + EXPECT_EQ(memcpy_inline, lookupIntrinsicID(S)); +} + // Tests to verify getIntrinsicForClangBuiltin. TEST(IntrinsicNameLookup, ClangBuiltinLookup) { using namespace Intrinsic; From 71ce9522ca6a1b7a76b4149a1d1ce35b0a0c6872 Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Tue, 8 Jul 2025 10:32:14 -0700 Subject: [PATCH 2/2] Review feedback --- llvm/unittests/IR/IntrinsicsTest.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp index fff13d51897f8..1183ed08b468e 100644 --- a/llvm/unittests/IR/IntrinsicsTest.cpp +++ b/llvm/unittests/IR/IntrinsicsTest.cpp @@ -96,6 +96,10 @@ TEST(IntrinsicNameLookup, NonNullterminatedStringRef) { // Make an intrinsic name "llvm.memcpy.inline" on the heap. std::string Name = "llvm.memcpy.inline"; assert(Name.size() == 18); + // Create a StringRef backed by heap allocated memory such that OOB access + // in that StringRef can be flagged by asan. Here, the String `S` is of size + // 18, and backed by a heap allocated buffer `Data`, so access to S[18] will + // be flagged bby asan. std::unique_ptr Data = std::make_unique(Name.size()); std::strncpy(Data.get(), Name.data(), Name.size()); StringRef S(Data.get(), Name.size());