diff --git a/llvm/docs/PCSectionsMetadata.rst b/llvm/docs/PCSectionsMetadata.rst new file mode 100644 --- /dev/null +++ b/llvm/docs/PCSectionsMetadata.rst @@ -0,0 +1,116 @@ +========================= +LLVM PC Sections Metadata +========================= + +.. contents:: + :local: + +Introduction +============ + +PC Sections Metadata can be attached to instructions and functions, for which +addresses, viz. program counters (PCs), are to be emitted in specially encoded +binary sections. Metadata is assigned as an ``MDNode`` of the ``MD_pcsections`` +(``!pcsections``) kind; the following section describes the metadata format. + +Metadata Format +=============== + +An arbitrary number of interleaved ``MDString`` and constant operators can be +added, where a new ``MDString`` always denotes a section name, followed by an +arbitrary number of auxiliary constant data encoded along the PC of the +instruction or function. The first operator must be a ``MDString`` denoting the +first section. + +.. code-block:: none + + !0 = !{ + !"" + [ , !1 ... ] + [ !" + [ , !2 ... ] + ... ] + } + !1 = !{ iXX , ... } + !2 = !{ iXX , ... } + ... + +The occurrence of ``section#1``, ``section#2``, ..., ``section#N`` in the +metadata causes the backend to emit the PC for the associated instruction or +function to all named sections. For each emitted PC in a section #N, the +constants ``aux-consts#N`` in the tuple ``!N`` will be emitted after the PC. +Multiple tuples with constant data may be provided after a section name string +(e.g. ``!0 = !{"s1", !1, !2}``), and a single constant tuple may be reused for +different sections (e.g. ``!0 = !{"s1", !1, "s2", !1}``). + +Binary Encoding +=============== + +*Instructions* result in emitting a single PC, and *functions* result in +emission of the start of the function and a 32-bit size. This is followed by +the auxiliary constants that followed the respective section name in the +``MD_pcsections`` metadata. + +To avoid relocations in the final binary, each PC address stored at ``entry`` +is a relative relocation, computed as ``pc - entry``. To decode, a user has to +compute ``entry + *entry``. + +The size of each entry depends on the code model. With large and medium sized +code models, the entry size matches pointer size. For any smaller code model +the entry size is just 32 bits. + +Guarantees on Code Generation +============================= + +Attaching ``!pcsections`` metadata to LLVM IR instructions *shall not* affect +optimizations or code generation outside the requested PC sections. + +While relying on LLVM IR metadata to request PC sections makes the above +guarantee relatively trivial, propagation of metadata through the optimization +and code generation pipeline has the following guarantees. + +Metadata Propagation +-------------------- + +In general, LLVM *does not make any guarantees* about preserving IR metadata +(attached to an ``Instruction``) through IR transformations. When using PC +sections metadata, this guarantee is unchanged, and ``!pcsections`` metadata is +remains *optional* until lowering to machine IR (MIR). + +Note for Code Generation +------------------------ + +As with other LLVM IR metadata, there are no requirements for LLVM IR +transformation passes to preserve ``!pcsections`` metadata, with the following +exceptions: + + * The ``AtomicExpandPass`` shall preserve ``!pcsections`` metadata + according to the below rules 1-4. + +When translating LLVM IR to MIR, the ``!pcsections`` metadata shall be copied +from the source ``Instruction`` to the target ``MachineInstr`` (set with +``MachineInstr::setPCSections()``). The instruction selectors and MIR +optimization passes shall preserve PC sections metadata as follows: + + 1. Replacements will preserve PC sections metadata of the replaced + instruction. + + 2. Duplications will preserve PC sections metadata of the copied + instruction. + + 3. Merging will preserve PC sections metadata of one of the two + instructions (no guarantee on which instruction's metadata is used). + + 4. Deletions will loose PC sections metadata. + +This is similar to debug info, and the ``BuildMI()`` helper provides a +convenient way to propagate debug info and ``!pcsections`` metadata in the +``MIMetadata`` bundle. + +Note for Metadata Users +----------------------- + +Use cases for ``!pcsections`` metadata should either be fully tolerant to +missing metadata, or the passes inserting ``!pcsections`` metadata should run +*after* all LLVM IR optimization passes to preserve the metadata until being +translated to MIR. diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst --- a/llvm/docs/Reference.rst +++ b/llvm/docs/Reference.rst @@ -33,6 +33,7 @@ MarkedUpDisassembly MIRLangRef OptBisect + PCSectionsMetadata PDB/index PointerAuth ScudoHardenedAllocator diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -47,3 +47,4 @@ LLVM_FIXED_MD_KIND(MD_exclude, "exclude", 33) LLVM_FIXED_MD_KIND(MD_memprof, "memprof", 34) LLVM_FIXED_MD_KIND(MD_callsite, "callsite", 35) +LLVM_FIXED_MD_KIND(MD_pcsections, "pcsections", 36) diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -15,6 +15,7 @@ #define LLVM_IR_MDBUILDER_H #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/DataTypes.h" @@ -112,6 +113,16 @@ /// prologue for the "function" santizier. MDNode *createRTTIPointerPrologue(Constant *PrologueSig, Constant *RTTI); + //===------------------------------------------------------------------===// + // PC sections metadata. + //===------------------------------------------------------------------===// + + /// A pair of PC section name with auxilliary constant data. + using PCSection = std::pair>; + + /// Return metadata for PC sections. + MDNode *createPCSections(ArrayRef Sections); + //===------------------------------------------------------------------===// // AA metadata. //===------------------------------------------------------------------===// diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -158,6 +158,27 @@ return MDNode::get(Context, Ops); } +MDNode *MDBuilder::createPCSections(ArrayRef Sections) { + SmallVector Ops; + + for (const auto &Entry : Sections) { + const StringRef &Sec = Entry.first; + Ops.push_back(createString(Sec)); + + // If auxiliary data for this section exists, append it. + const SmallVector &AuxConsts = Entry.second; + if (!AuxConsts.empty()) { + SmallVector AuxMDs; + AuxMDs.reserve(AuxConsts.size()); + for (Constant *C : AuxConsts) + AuxMDs.push_back(createConstant(C)); + Ops.push_back(MDNode::get(Context, AuxMDs)); + } + } + + return MDNode::get(Context, Ops); +} + MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) { SmallVector Args(1, nullptr); if (Extra) diff --git a/llvm/unittests/IR/MDBuilderTest.cpp b/llvm/unittests/IR/MDBuilderTest.cpp --- a/llvm/unittests/IR/MDBuilderTest.cpp +++ b/llvm/unittests/IR/MDBuilderTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" @@ -104,4 +105,26 @@ EXPECT_EQ(mdconst::extract(N2->getOperand(2))->getZExtValue(), 1U); } +TEST_F(MDBuilderTest, createPCSections) { + MDBuilder MDHelper(Context); + ConstantInt *C1 = ConstantInt::get(Context, APInt(8, 1)); + ConstantInt *C2 = ConstantInt::get(Context, APInt(8, 2)); + MDNode *PCS = MDHelper.createPCSections({{"s1", {C1, C2}}, {"s2", {}}}); + ASSERT_EQ(PCS->getNumOperands(), 3U); + const auto *S1 = dyn_cast(PCS->getOperand(0)); + const auto *Aux = dyn_cast(PCS->getOperand(1)); + const auto *S2 = dyn_cast(PCS->getOperand(2)); + ASSERT_NE(S1, nullptr); + ASSERT_NE(Aux, nullptr); + ASSERT_NE(S2, nullptr); + EXPECT_EQ(S1->getString(), "s1"); + EXPECT_EQ(S2->getString(), "s2"); + ASSERT_EQ(Aux->getNumOperands(), 2U); + ASSERT_TRUE(isa(Aux->getOperand(0))); + ASSERT_TRUE(isa(Aux->getOperand(1))); + EXPECT_EQ(mdconst::extract(Aux->getOperand(0))->getValue(), + C1->getValue()); + EXPECT_EQ(mdconst::extract(Aux->getOperand(1))->getValue(), + C2->getValue()); } +} // namespace