diff --git a/llvm/include/llvm/ADT/StringTable.h b/llvm/include/llvm/ADT/StringTable.h index b3c4a414ed6b4..c089a070d4b57 100644 --- a/llvm/include/llvm/ADT/StringTable.h +++ b/llvm/include/llvm/ADT/StringTable.h @@ -85,14 +85,18 @@ class StringTable { "Last byte must be a null byte."); } - // Get a string from the table starting with the provided offset. The returned - // `StringRef` is in fact null terminated, and so can be converted safely to a - // C-string if necessary for a system API. - constexpr StringRef operator[](Offset O) const { + // Returns the raw C string from the table starting with the provided offset. + // The returned string is null terminated. + constexpr const char *getCString(Offset O) const { assert(O.value() < Table.size() && "Out of bounds offset!"); return Table.data() + O.value(); } + // Get a string from the table starting with the provided offset. The returned + // `StringRef` is in fact null terminated, and so can be converted safely to a + // C-string if necessary for a system API. + constexpr StringRef operator[](Offset O) const { return getCString(O); } + /// Returns the byte size of the table. constexpr size_t size() const { return Table.size(); } diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp index e631419d5e1c2..68ab15eab1aea 100644 --- a/llvm/lib/IR/Intrinsics.cpp +++ b/llvm/lib/IR/Intrinsics.cpp @@ -660,20 +660,20 @@ static int lookupLLVMIntrinsicByName(ArrayRef NameOffsetTable, // `equal_range` requires the comparison to work with either side being an // offset or the value. Detect which kind each side is to set up the // compared strings. - StringRef LHSStr; - if constexpr (std::is_integral_v) { - LHSStr = IntrinsicNameTable[LHS]; - } else { + const char *LHSStr; + if constexpr (std::is_integral_v) + LHSStr = IntrinsicNameTable.getCString(LHS); + else LHSStr = LHS; - } - StringRef RHSStr; - if constexpr (std::is_integral_v) { - RHSStr = IntrinsicNameTable[RHS]; - } else { + + const char *RHSStr; + if constexpr (std::is_integral_v) + RHSStr = IntrinsicNameTable.getCString(RHS); + else RHSStr = RHS; - } - return strncmp(LHSStr.data() + CmpStart, RHSStr.data() + CmpStart, - CmpEnd - CmpStart) < 0; + + return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) < + 0; }; LastLow = Low; std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp); diff --git a/llvm/unittests/IR/IntrinsicsTest.cpp b/llvm/unittests/IR/IntrinsicsTest.cpp index ad9af2075e371..1183ed08b468e 100644 --- a/llvm/unittests/IR/IntrinsicsTest.cpp +++ b/llvm/unittests/IR/IntrinsicsTest.cpp @@ -80,6 +80,32 @@ TEST(IntrinsicNameLookup, Basic) { EXPECT_EQ(memcpy_inline, lookupIntrinsicID("llvm.memcpy.inline.p0.p0.i1024")); } +TEST(IntrinsicNameLookup, NonNullterminatedStringRef) { + using namespace Intrinsic; + // This reproduces an issue where lookupIntrinsicID() can access memory beyond + // the bounds of the passed in StringRef. For ASAN to catch this as an error, + // create a StringRef using heap allocated memory and make it not null + // terminated. + + // ASAN will report a "AddressSanitizer: heap-buffer-overflow" error in + // `lookupLLVMIntrinsicByName` when LLVM is built with these options: + // -DCMAKE_BUILD_TYPE=Debug + // -DLLVM_USE_SANITIZER=Address + // -DLLVM_OPTIMIZE_SANITIZED_BUILDS=OFF + + // Make an intrinsic name "llvm.memcpy.inline" on the heap. + std::string Name = "llvm.memcpy.inline"; + assert(Name.size() == 18); + // Create a StringRef backed by heap allocated memory such that OOB access + // in that StringRef can be flagged by asan. Here, the String `S` is of size + // 18, and backed by a heap allocated buffer `Data`, so access to S[18] will + // be flagged bby asan. + std::unique_ptr Data = std::make_unique(Name.size()); + std::strncpy(Data.get(), Name.data(), Name.size()); + StringRef S(Data.get(), Name.size()); + EXPECT_EQ(memcpy_inline, lookupIntrinsicID(S)); +} + // Tests to verify getIntrinsicForClangBuiltin. TEST(IntrinsicNameLookup, ClangBuiltinLookup) { using namespace Intrinsic;