-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[BinaryFormat] Add "SFrame" structures and constants #147264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-binary-utilities Author: Pavel Labath (labath) ChangesThis patch defines the structures and constants used by the SFrame unwind info format supported by GNU binutils. For more information about the format, see https://sourceware.org/binutils/wiki/sframe and https://discourse.llvm.org/t/rfc-adding-sframe-support-to-llvm/86900 In the patch, I've used the naming convention for everything that has a direct equivalent to the specification (modulo changing macros to constants), and used the llvm convention for everything else. Full diff: https://github.com/llvm/llvm-project/pull/147264.diff 3 Files Affected:
diff --git a/llvm/include/llvm/BinaryFormat/SFrame.h b/llvm/include/llvm/BinaryFormat/SFrame.h
new file mode 100644
index 0000000000000..62a2cbb748af5
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/SFrame.h
@@ -0,0 +1,167 @@
+//===-- llvm/BinaryFormat/SFrame.h ---SFrame Data Structures ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains data-structure definitions and constants to support
+/// unwinding based on .sframe sections. This only supports SFRAME_VERSION_2
+/// as described at https://sourceware.org/binutils/docs/sframe-spec.html
+///
+/// Naming conventions follow the spec document. #defines converted to constants
+/// and enums for better C++ compatibility.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BINARYFORMAT_SFRAME_H
+#define LLVM_BINARYFORMAT_SFRAME_H
+
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+namespace sframe {
+
+//===----------------------------------------------------------------------===//
+
+constexpr uint16_t SFRAME_MAGIC = 0xDEE2;
+
+enum : uint8_t {
+ SFRAME_VERSION_1 = 1,
+ SFRAME_VERSION_2 = 2,
+};
+
+/// sframe_preable.sfp_flags flags.
+enum : uint8_t {
+ SFRAME_F_FDE_SORTED = 0x1,
+ SFRAME_F_FRAME_POINTER = 0x2,
+};
+
+/// Possible values for sframe_header.sfh_abi_arch.
+enum : uint8_t {
+ SFRAME_ABI_AARCH64_ENDIAN_BIG = 1,
+ SFRAME_ABI_AARCH64_ENDIAN_LITTLE = 2,
+ SFRAME_ABI_AMD64_ENDIAN_LITTLE = 3
+};
+
+/// SFrame FRE Types. Bits 0-3 of sframe_func_desc_entry.sfde_func_info.
+enum : uint8_t {
+ SFRAME_FRE_TYPE_ADDR1 = 0,
+ SFRAME_FRE_TYPE_ADDR2 = 1,
+ SFRAME_FRE_TYPE_ADDR4 = 2,
+};
+
+/// SFrame FDE Types. Bit 4 of sframe_func_desc_entry.sfde_func_info.
+enum : uint8_t {
+ SFRAME_FDE_TYPE_PCINC = 0,
+ SFRAME_FDE_TYPE_PCMASK = 1,
+};
+
+/// Speficies key used for signing return addresses. Bit 5 of
+/// sframe_func_desc_entry.sfde_func_info.
+enum : uint8_t {
+ SFRAME_AARCH64_PAUTH_KEY_A = 0,
+ SFRAME_AARCH64_PAUTH_KEY_B = 1,
+};
+
+/// Size of stack offsets. Bits 5-6 of sframe_fre_info.fre_info.
+enum : uint8_t {
+ SFRAME_FRE_OFFSET_1B = 0,
+ SFRAME_FRE_OFFSET_2B = 1,
+ SFRAME_FRE_OFFSET_4B = 2,
+};
+
+/// Stack frame base register. Bit 0 of sframe_fre_info.fre_info.
+enum : uint8_t { SFRAME_BASE_REG_FP = 0, SFRAME_BASE_REG_SP = 1 };
+
+LLVM_PACKED_START
+
+struct sframe_preamble {
+ uint16_t sfp_magic;
+ uint8_t sfp_version;
+ uint8_t sfp_flags;
+};
+
+struct sframe_header {
+ sframe_preamble sfh_preamble;
+ uint8_t sfh_abi_arch;
+ int8_t sfh_cfa_fixed_fp_offset;
+ int8_t sfh_cfa_fixed_ra_offset;
+ uint8_t sfh_auxhdr_len;
+ uint32_t sfh_num_fdes;
+ uint32_t sfh_num_fres;
+ uint32_t sfh_fre_len;
+ uint32_t sfh_fdeoff;
+ uint32_t sfh_freoff;
+};
+
+struct sframe_func_desc_entry {
+ int32_t sfde_func_start_address;
+ uint32_t sfde_func_size;
+ uint32_t sfde_func_start_fre_off;
+ uint32_t sfde_func_num_fres;
+ uint8_t sfde_func_info;
+ uint8_t sfde_func_rep_size;
+ uint16_t sfde_func_padding2;
+
+ uint8_t getPAuthKey() const { return (sfde_func_info >> 5) & 1; }
+ uint8_t getFDEType() const { return (sfde_func_info >> 4) & 1; }
+ uint8_t getFREType() const { return sfde_func_info & 0xf; }
+ void setPAuthKey(uint8_t P) { setFuncInfo(P, getFDEType(), getFREType()); }
+ void setFDEType(uint8_t D) { setFuncInfo(getPAuthKey(), D, getFREType()); }
+ void setFREType(uint8_t R) { setFuncInfo(getPAuthKey(), getFDEType(), R); }
+ void setFuncInfo(uint8_t PAuthKey, uint8_t FDEType, uint8_t FREType) {
+ sfde_func_info =
+ ((PAuthKey & 1) << 5) | ((FDEType & 1) << 4) | (FREType & 0xf);
+ }
+};
+
+struct sframe_fre_info {
+ uint8_t fre_info;
+
+ bool isReturnAddressSigned() const { return fre_info >> 7; }
+ uint8_t getOffsetSize() const { return (fre_info >> 5) & 3; }
+ uint8_t getOffsetCount() const { return (fre_info >> 1) & 0xf; }
+ uint8_t getBaseRegister() const { return fre_info & 1; }
+ void setReturnAddressSigned(bool RA) {
+ setFREInfo(RA, getOffsetSize(), getOffsetCount(), getBaseRegister());
+ }
+ void setOffsetSize(uint8_t Sz) {
+ setFREInfo(isReturnAddressSigned(), Sz, getOffsetCount(),
+ getBaseRegister());
+ }
+ void setOffsetCount(uint8_t N) {
+ setFREInfo(isReturnAddressSigned(), getOffsetSize(), N, getBaseRegister());
+ }
+ void setBaseRegister(uint8_t Reg) {
+ setFREInfo(isReturnAddressSigned(), getOffsetSize(), getOffsetCount(), Reg);
+ }
+ void setFREInfo(bool RA, uint8_t Sz, uint8_t N, uint8_t Reg) {
+ fre_info = ((RA & 1) << 7) | ((Sz & 3) << 5) | ((N & 0xf) << 1) | (Reg & 1);
+ }
+};
+
+struct sframe_frame_row_entry_addr1 {
+ uint8_t sfre_start_address;
+ sframe_fre_info sfre_info;
+};
+
+struct sframe_frame_row_entry_addr2 {
+ uint16_t sfre_start_address;
+ sframe_fre_info sfre_info;
+};
+
+struct sframe_frame_row_entry_addr4 {
+ uint32_t sfre_start_address;
+ sframe_fre_info sfre_info;
+};
+
+LLVM_PACKED_END
+
+} // namespace sframe
+} // namespace llvm
+
+#endif // LLVM_BINARYFORMAT_SFRAME_H
diff --git a/llvm/unittests/BinaryFormat/CMakeLists.txt b/llvm/unittests/BinaryFormat/CMakeLists.txt
index 40d3bc4dca0b6..eac5977a2c1c3 100644
--- a/llvm/unittests/BinaryFormat/CMakeLists.txt
+++ b/llvm/unittests/BinaryFormat/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_unittest(BinaryFormatTests
MsgPackDocumentTest.cpp
MsgPackReaderTest.cpp
MsgPackWriterTest.cpp
+ SFrameTest.cpp
TestFileMagic.cpp
)
diff --git a/llvm/unittests/BinaryFormat/SFrameTest.cpp b/llvm/unittests/BinaryFormat/SFrameTest.cpp
new file mode 100644
index 0000000000000..abf84ec72a1f7
--- /dev/null
+++ b/llvm/unittests/BinaryFormat/SFrameTest.cpp
@@ -0,0 +1,99 @@
+//===- SFrameTest.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/SFrame.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::sframe;
+
+namespace {
+// Test structure sizes and triviality.
+static_assert(std::is_trivial_v<sframe_preamble>);
+static_assert(sizeof(sframe_preamble) == 4);
+
+static_assert(std::is_trivial_v<sframe_header>);
+static_assert(sizeof(sframe_header) == 28);
+
+static_assert(std::is_trivial_v<sframe_func_desc_entry>);
+static_assert(sizeof(sframe_func_desc_entry) == 20);
+
+static_assert(std::is_trivial_v<sframe_frame_row_entry_addr1>);
+static_assert(sizeof(sframe_frame_row_entry_addr1) == 2);
+
+static_assert(std::is_trivial_v<sframe_frame_row_entry_addr2>);
+static_assert(sizeof(sframe_frame_row_entry_addr2) == 3);
+
+static_assert(std::is_trivial_v<sframe_frame_row_entry_addr4>);
+static_assert(sizeof(sframe_frame_row_entry_addr4) == 5);
+
+
+TEST(SFrameTest, FDEFlags) {
+ sframe_func_desc_entry FDE = {};
+ EXPECT_EQ(FDE.sfde_func_info, 0u);
+ EXPECT_EQ(FDE.getPAuthKey(), SFRAME_AARCH64_PAUTH_KEY_A);
+ EXPECT_EQ(FDE.getFDEType(), SFRAME_FDE_TYPE_PCINC);
+ EXPECT_EQ(FDE.getFREType(), SFRAME_FRE_TYPE_ADDR1);
+
+ FDE.setPAuthKey(SFRAME_AARCH64_PAUTH_KEY_B);
+ EXPECT_EQ(FDE.sfde_func_info, 0x20u);
+ EXPECT_EQ(FDE.getPAuthKey(), SFRAME_AARCH64_PAUTH_KEY_B);
+ EXPECT_EQ(FDE.getFDEType(), SFRAME_FDE_TYPE_PCINC);
+ EXPECT_EQ(FDE.getFREType(), SFRAME_FRE_TYPE_ADDR1);
+
+ FDE.setFDEType(SFRAME_FDE_TYPE_PCMASK);
+ EXPECT_EQ(FDE.sfde_func_info, 0x30u);
+ EXPECT_EQ(FDE.getPAuthKey(), SFRAME_AARCH64_PAUTH_KEY_B);
+ EXPECT_EQ(FDE.getFDEType(), SFRAME_FDE_TYPE_PCMASK);
+ EXPECT_EQ(FDE.getFREType(), SFRAME_FRE_TYPE_ADDR1);
+
+ FDE.setFREType(SFRAME_FRE_TYPE_ADDR4);
+ EXPECT_EQ(FDE.sfde_func_info, 0x32u);
+ EXPECT_EQ(FDE.getPAuthKey(), SFRAME_AARCH64_PAUTH_KEY_B);
+ EXPECT_EQ(FDE.getFDEType(), SFRAME_FDE_TYPE_PCMASK);
+ EXPECT_EQ(FDE.getFREType(), SFRAME_FRE_TYPE_ADDR4);
+}
+
+TEST(SFrameTest, FREFlags) {
+ sframe_fre_info Info = {};
+ EXPECT_EQ(Info.fre_info, 0u);
+ EXPECT_FALSE(Info.isReturnAddressSigned());
+ EXPECT_EQ(Info.getOffsetSize(), SFRAME_FRE_OFFSET_1B);
+ EXPECT_EQ(Info.getOffsetCount(), 0u);
+ EXPECT_EQ(Info.getBaseRegister(), SFRAME_BASE_REG_FP);
+
+ Info.setReturnAddressSigned(true);
+ EXPECT_EQ(Info.fre_info, 0x80u);
+ EXPECT_TRUE(Info.isReturnAddressSigned());
+ EXPECT_EQ(Info.getOffsetSize(), SFRAME_FRE_OFFSET_1B);
+ EXPECT_EQ(Info.getOffsetCount(), 0u);
+ EXPECT_EQ(Info.getBaseRegister(), SFRAME_BASE_REG_FP);
+
+ Info.setOffsetSize(SFRAME_FRE_OFFSET_4B);
+ EXPECT_EQ(Info.fre_info, 0xc0u);
+ EXPECT_TRUE(Info.isReturnAddressSigned());
+ EXPECT_EQ(Info.getOffsetSize(), SFRAME_FRE_OFFSET_4B);
+ EXPECT_EQ(Info.getOffsetCount(), 0u);
+ EXPECT_EQ(Info.getBaseRegister(), SFRAME_BASE_REG_FP);
+
+ Info.setOffsetCount(3);
+ EXPECT_EQ(Info.fre_info, 0xc6u);
+ EXPECT_TRUE(Info.isReturnAddressSigned());
+ EXPECT_EQ(Info.getOffsetSize(), SFRAME_FRE_OFFSET_4B);
+ EXPECT_EQ(Info.getOffsetCount(), 3u);
+ EXPECT_EQ(Info.getBaseRegister(), SFRAME_BASE_REG_FP);
+
+ Info.setBaseRegister(SFRAME_BASE_REG_SP);
+ EXPECT_EQ(Info.fre_info, 0xc7u);
+ EXPECT_TRUE(Info.isReturnAddressSigned());
+ EXPECT_EQ(Info.getOffsetSize(), SFRAME_FRE_OFFSET_4B);
+ EXPECT_EQ(Info.getOffsetCount(), 3u);
+ EXPECT_EQ(Info.getBaseRegister(), SFRAME_BASE_REG_SP);
+}
+
+} // namespace
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
I think this will be the most controversial part of the patch. Starting out, it made sense to me to use the original convention, but the more code I write using this, the less I am convinced that is the right choice. I can find precedents for both choices in the codebase. I'd be happy to LLVM-ify this code if that is preferred. |
uint8_t getPAuthKey() const { return (sfde_func_info >> 5) & 1; } | ||
uint8_t getFDEType() const { return (sfde_func_info >> 4) & 1; } | ||
uint8_t getFREType() const { return sfde_func_info & 0xf; } | ||
void setPAuthKey(uint8_t P) { setFuncInfo(P, getFDEType(), getFREType()); } | ||
void setFDEType(uint8_t D) { setFuncInfo(getPAuthKey(), D, getFREType()); } | ||
void setFREType(uint8_t R) { setFuncInfo(getPAuthKey(), getFDEType(), R); } | ||
void setFuncInfo(uint8_t PAuthKey, uint8_t FDEType, uint8_t FREType) { | ||
sfde_func_info = | ||
((PAuthKey & 1) << 5) | ((FDEType & 1) << 4) | (FREType & 0xf); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This part was inspired by Elf32_Sym::setBindingAndType
and friends.
It may also be useful to look at #147294 to see how this is used. |
@@ -0,0 +1,165 @@ | |||
//===-- llvm/BinaryFormat/SFrame.h ---SFrame Data Structures ----*- C++ -*-===// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
//===-- llvm/BinaryFormat/SFrame.h ---SFrame Data Structures ----*- C++ -*-===// | |
//===-- llvm/BinaryFormat/SFrame.h ---SFrame Data Structures --------------===// |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that per the latest coding standard, the file name is not needed in the header: https://llvm.org/docs/CodingStandards.html#file-headers
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
I go back and forth on this, having written a bunch of code with these definitions. On the one hand, it is nice when evaluating against the spec. On the other, it sticks out like a sore thumb once you get past the lowest-level. I'm somewhat inclined to change it all over to llvm style, with a comment on the structures definitions giving the corresponding specification name. It does make grepping a bit harder, but probably worth it. Don't know if anyone else has an opinion. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Approving, because it is OK as is on my side, but let's sort out the naming convention before going too much further.
namespace { | ||
// Test structure sizes and triviality. | ||
static_assert(std::is_trivial_v<sframe_preamble>); | ||
static_assert(sizeof(sframe_preamble) == 4); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This sort of "structure exactly matches the protocol" (with the implication that these might be memory mapped from inputs) always makes me a bit uncomfortable - do other parts of BinaryFormat do this? (and if we/they do, should the members use some of the types we have for that sort of type punning - that handle endianness, etc, if the file being read mismatches the current architecture being executed on)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The sframe specification expects consumers to memory map the new PT_GNU_SFRAME segment (and to handle any endianness issues), and carefully specifies the sizes and offsets of every field. So although we don't necessarily need to match that for our working copies, we do need to match that for reading and writing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The COFF and ELF parsers both heavily rely on directly accessing memory-mapped files. Accessing the data directly is simpler/faster than a separate in-memory representation. But getting the error-handling right can be a bit tricky. (In particular, you have to make sure you don't read past the end of the file.)
But yes, if we're going to do this, we should use the types that automatically handle the endianness/alignment issues, like packed_endian_specific_integral/ulittle32_t/etc.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
COFF and ELF parsers indeed use endian-specific integers, but they don't define these types inside the BinaryFormat library. What they do is instead is redefine those types inside the Object library (include/llvm/Object/{COFF,ELFTypes}.h). The types in BinaryFormat don't have asserts on their size, but it does look like they (intentionally or not) follow the on-disk layout. So this PR is sort of consistent with that.
That said, my plan was not to use these structures by reinterpret_cast
ing the mmapped data. I was planning to use the DataExtractor (see dependent PR) to take care of the endianness when reading from the data (which can still be mmapped). I considered templatizing on the endianness, but I wanted to avoid that because of the additional wrapping needed when switching between generic and concrete code. I can do that, if desired, but this seemed like it was easier to do. This also means I technically don't need the structure layout to match the protocol, but I think it's a nice form of documentation at least.
Another reason is that the initial version of this file based on @Sterling-Augustine's prototype (a fact I neglected to mention). Looking at the generation code, I see that it also has no need for the layout or endianness of this structure, but that's not entirely the case in the linker (which needs to produce and consume the format). The code, at least in the current form, does look like it could benefit from endian specific integers, though I think we could find precedents for using DataExtractors as well (e.g., the debug_names linker, whose functionality is actually quite similar to this).
Circling back, I think the main question is what mechanism do we use for parsing. The options I see are:
- endian-specific integers plus reinterpret_cast (used e.g. in ELF and COFF)
- DataExtractor (used mainly in DWARF, which includes eh_frame)
- memcpy + swapByteOrder (used in MachO and some other stuff)
I'd prefer the second option, but I'm happy to go with anything, particularly if it means the parser can be reused in the linker.
Assuming we go with the first option, the second question is whether we do the ELF thing of defining the structures in both BinaryFormat and Object libraries, or we just have a single version (where?).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DataExtractor is fine, I guess, as long as the overhead of copying the data isn't a significant bottleneck.
Please don't use swapByteOrder; basically everyone who tries to write explicit byte-swapping code messes up, and it's hard to follow even if you get it right.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@Sterling-Augustine, @dwblaikie, any thoughts on this? I'm particularly interested in whether you have plans to reuse the parsing code in the linker. My plan was to provide an iterator-based api to access the FDEs. In this setup, it would copy all of the FDE data in order to extract it and swap it.
I'm pretty sure that's fine for dumping (which is accessing everything anyway, and is not performance critical), and for LLDB (which only accesses one FDE at a time), but maybe it's not okay for the linker which might only need to access the offset field of the FDE (to sort it) and the rest could be memcpy
ed into the output buffer?
Not a strong opinion, but we ended up matching the spec for DAP and I regret it because now nothing looks consistent. Mild +1 on changing it to match the LLVM coding style. |
#include "llvm/Support/Compiler.h" | ||
#include "llvm/Support/DataTypes.h" | ||
|
||
namespace llvm { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: can we use namespace llvm::sframe {
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
This patch defines the structures and constants used by the SFrame unwind info format supported by GNU binutils. For more information about the format, see https://sourceware.org/binutils/wiki/sframe and https://discourse.llvm.org/t/rfc-adding-sframe-support-to-llvm/86900 In the patch, I've used the naming convention for everything that has a direct equivalent to the specification (modulo changing macros to constants), and used the llvm convention for everything else.
It looks like there is enough votes to go in that direction. Please take a look at the new version (you can select the second commit to view the diff between the two). I've removed I haven't done anything with the endianness, as I am waiting for the result of the discussion. |
.. produced by ADT/BitmaskEnum.h. These aren't implicitly convertible to an integer, so I needed to tweak a couple of bitwise operations and add an explicit constructor for HexNumber and FlagEntry. Motivation: I'd like to use this in the SFrame data structures (llvm#147264)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
.. produced by ADT/BitmaskEnum.h. These aren't implicitly convertible to an integer, so I needed to tweak a couple of bitwise operations and add an explicit constructor for HexNumber and FlagEntry. Motivation: I'd like to use this in the SFrame data structures (#147264)
This patch defines the structures and constants used by the SFrame unwind info format supported by GNU binutils. For more information about the format, see https://sourceware.org/binutils/wiki/sframe and https://discourse.llvm.org/t/rfc-adding-sframe-support-to-llvm/86900
In the patch, I've used the naming convention for everything that has a direct equivalent to the specification (modulo changing macros to constants), and used the llvm convention for everything else.