diff --git a/clang/test/CodeGen/sanitize-metadata.c b/clang/test/CodeGen/sanitize-metadata.c --- a/clang/test/CodeGen/sanitize-metadata.c +++ b/clang/test/CodeGen/sanitize-metadata.c @@ -31,6 +31,6 @@ // CHECK-LABEL: __sanitizer_metadata_covered.module_dtor // CHECK: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered) -// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered", ![[ATOMICS_COVERED_AUX:[0-9]+]]} -// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i8 1} -// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics"} +// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered!C", ![[ATOMICS_COVERED_AUX:[0-9]+]]} +// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i64 1} +// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics!C"} diff --git a/compiler-rt/test/metadata/common.h b/compiler-rt/test/metadata/common.h --- a/compiler-rt/test/metadata/common.h +++ b/compiler-rt/test/metadata/common.h @@ -25,6 +25,20 @@ return v; } +uint64_t consume_uleb128(const char *&pos, const char *end) { + uint64_t val = 0; + int shift = 0; + uint8_t cur; + do { + cur = *pos++; + val |= uint64_t{cur & 0x7fu} << shift; + shift += 7; + } while (cur & 0x80); + assert(shift < 64); + assert(pos <= end); + return val; +} + constexpr uint32_t kSanitizerBinaryMetadataUARHasSize = 1 << 2; uint32_t meta_version; @@ -45,13 +59,13 @@ const auto base = reinterpret_cast(pos); const intptr_t offset = offset_ptr_sized ? consume(pos, end) : consume(pos, end); - [[maybe_unused]] const uint32_t size = consume(pos, end); - const uint32_t features = consume(pos, end); - uint32_t stack_args = 0; + [[maybe_unused]] const uint64_t size = consume_uleb128(pos, end); + const uint64_t features = consume_uleb128(pos, end); + uint64_t stack_args = 0; if (features & kSanitizerBinaryMetadataUARHasSize) - stack_args = consume(pos, end); + stack_args = consume_uleb128(pos, end); if (const char *name = symbolize(base + offset)) - printf("%s: features=%x stack_args=%u\n", name, features, stack_args); + printf("%s: features=%lx stack_args=%lu\n", name, features, stack_args); } meta_version = version; meta_start = start; diff --git a/llvm/docs/PCSectionsMetadata.rst b/llvm/docs/PCSectionsMetadata.rst --- a/llvm/docs/PCSectionsMetadata.rst +++ b/llvm/docs/PCSectionsMetadata.rst @@ -59,6 +59,18 @@ code models, the entry size matches pointer size. For any smaller code model the entry size is just 32 bits. +Encoding Options +---------------- + +Optional encoding options can be passed in the first ``MDString`` operator: +``
!``. The following options are available: + + * ``C`` -- Compress constant integers of size 2-8 bytes as ULEB128; this + includes the function size (but excludes the PC entry). + +For example, ``foo!C`` will emit into section ``foo`` with all constants +encoded as ULEB128. + Guarantees on Code Generation ============================= diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -643,6 +643,13 @@ /// Emit a long long directive and value. void emitInt64(uint64_t Value) const; + /// Emit the specified signed leb128 value. + void emitSLEB128(int64_t Value, const char *Desc = nullptr) const; + + /// Emit the specified unsigned leb128 value. + void emitULEB128(uint64_t Value, const char *Desc = nullptr, + unsigned PadTo = 0) const; + /// Emit something like ".long Hi-Lo" where the size in bytes of the directive /// is specified by Size and Hi/Lo specify the labels. This implicitly uses /// .set if it is available. @@ -670,13 +677,6 @@ // Dwarf Emission Helper Routines //===------------------------------------------------------------------===// - /// Emit the specified signed leb128 value. - void emitSLEB128(int64_t Value, const char *Desc = nullptr) const; - - /// Emit the specified unsigned leb128 value. - void emitULEB128(uint64_t Value, const char *Desc = nullptr, - unsigned PadTo = 0) const; - /// Emit a .byte 42 directive that corresponds to an encoding. If verbose /// assembly output is enabled, we output comments describing the encoding. /// Desc is a string saying what the encoding is specifying (e.g. "LSDA"). diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h --- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h +++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h @@ -30,11 +30,11 @@ inline constexpr int kSanitizerBinaryMetadataUARBit = 1; inline constexpr int kSanitizerBinaryMetadataUARHasSizeBit = 2; -inline constexpr uint32_t kSanitizerBinaryMetadataAtomics = +inline constexpr uint64_t kSanitizerBinaryMetadataAtomics = 1 << kSanitizerBinaryMetadataAtomicsBit; -inline constexpr uint32_t kSanitizerBinaryMetadataUAR = +inline constexpr uint64_t kSanitizerBinaryMetadataUAR = 1 << kSanitizerBinaryMetadataUARBit; -inline constexpr uint32_t kSanitizerBinaryMetadataUARHasSize = +inline constexpr uint64_t kSanitizerBinaryMetadataUARHasSize = 1 << kSanitizerBinaryMetadataUARHasSizeBit; inline constexpr char kSanitizerBinaryMetadataCoveredSection[] = diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1496,9 +1496,22 @@ // constants may appear, which will simply be emitted into the current // section (the user of MD_pcsections decides the format of encoded data). assert(isa(MD.getOperand(0)) && "first operand not a string"); + bool ConstULEB128 = false; for (const MDOperand &MDO : MD.operands()) { if (auto *S = dyn_cast(MDO)) { - SwitchSection(S->getString()); + // Found string, start of new section! + // Find options for this section "
!" - supported options: + // C = Compress constant integers of size 2-8 bytes as ULEB128. + const StringRef SecWithOpt = S->getString(); + const size_t OptStart = SecWithOpt.find('!'); // likely npos + const StringRef Sec = SecWithOpt.substr(0, OptStart); + const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty + ConstULEB128 = Opts.find('C') != StringRef::npos; +#ifndef NDEBUG + for (char O : Opts) + assert((O == '!' || O == 'C') && "Invalid !pcsections options"); +#endif + SwitchSection(Sec); const MCSymbol *Prev = Syms.front(); for (const MCSymbol *Sym : Syms) { if (Sym == Prev || !Deltas) { @@ -1510,17 +1523,30 @@ // `base + addr`. emitLabelDifference(Sym, Base, RelativeRelocSize); } else { - emitLabelDifference(Sym, Prev, 4); + // Emit delta between symbol and previous symbol. + if (ConstULEB128) + emitLabelDifferenceAsULEB128(Sym, Prev); + else + emitLabelDifference(Sym, Prev, 4); } Prev = Sym; } } else { + // Emit auxiliary data after PC. assert(isa(MDO) && "expecting either string or tuple"); const auto *AuxMDs = cast(MDO); for (const MDOperand &AuxMDO : AuxMDs->operands()) { assert(isa(AuxMDO) && "expecting a constant"); - const auto *C = cast(AuxMDO); - emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue()); + const Constant *C = cast(AuxMDO)->getValue(); + const DataLayout &DL = F.getParent()->getDataLayout(); + const uint64_t Size = DL.getTypeStoreSize(C->getType()); + + if (auto *CI = dyn_cast(C); + CI && ConstULEB128 && Size > 1 && Size <= 8) { + emitULEB128(CI->getZExtValue()); + } else { + emitGlobalConstant(DL, C); + } } } } @@ -2788,6 +2814,22 @@ /// Emit a long directive and value. void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); } +/// EmitSLEB128 - emit the specified signed leb128 value. +void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const { + if (isVerbose() && Desc) + OutStreamer->AddComment(Desc); + + OutStreamer->emitSLEB128IntValue(Value); +} + +void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc, + unsigned PadTo) const { + if (isVerbose() && Desc) + OutStreamer->AddComment(Desc); + + OutStreamer->emitULEB128IntValue(Value, PadTo); +} + /// Emit a long long directive and value. void AsmPrinter::emitInt64(uint64_t Value) const { OutStreamer->emitInt64(Value); @@ -2801,6 +2843,12 @@ OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size); } +/// Emit something like ".uleb128 Hi-Lo". +void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) const { + OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo); +} + /// EmitLabelPlusOffset - Emit something like ".long Label+Offset" /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -32,28 +32,6 @@ // Dwarf Emission Helper Routines //===----------------------------------------------------------------------===// -/// EmitSLEB128 - emit the specified signed leb128 value. -void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const { - if (isVerbose() && Desc) - OutStreamer->AddComment(Desc); - - OutStreamer->emitSLEB128IntValue(Value); -} - -void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc, - unsigned PadTo) const { - if (isVerbose() && Desc) - OutStreamer->AddComment(Desc); - - OutStreamer->emitULEB128IntValue(Value, PadTo); -} - -/// Emit something like ".uleb128 Hi-Lo". -void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi, - const MCSymbol *Lo) const { - OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo); -} - static const char *DecodeDWARFEncoding(unsigned Encoding) { switch (Encoding) { case dwarf::DW_EH_PE_absptr: diff --git a/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp b/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp --- a/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp +++ b/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp @@ -52,7 +52,7 @@ if (!MD) return false; const auto &Section = *cast(MD->getOperand(0)); - if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection)) + if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection)) return false; auto &AuxMDs = *cast(MD->getOperand(1)); // Assume it currently only has features. diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp --- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp @@ -35,15 +35,16 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/StringSaver.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include -#include using namespace llvm; @@ -148,7 +149,7 @@ // to determine if a memory operation is atomic or not in modules compiled // with SanitizerBinaryMetadata. bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB, - uint32_t &FeatureMask); + uint64_t &FeatureMask); // Get start/end section marker pointer. GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); @@ -169,6 +170,8 @@ const SanitizerBinaryMetadataOptions Options; const Triple TargetTriple; IRBuilder<> IRB; + BumpPtrAllocator Alloc; + UniqueStringSaver StringPool{Alloc}; }; bool SanitizerBinaryMetadata::run() { @@ -245,7 +248,7 @@ // The metadata features enabled for this function, stored along covered // metadata (if enabled). - uint32_t FeatureMask = 0; + uint64_t FeatureMask = 0; // Don't emit unnecessary covered metadata for all functions to save space. bool RequiresCovered = false; @@ -270,10 +273,8 @@ const auto *MI = &MetadataInfo::Covered; MIS.insert(MI); const StringRef Section = getSectionName(MI->SectionSuffix); - // The feature mask will be placed after the size of the function. - assert(FeatureMask <= std::numeric_limits::max() && - "Increase feature mask bytes and bump version"); - Constant *CFM = IRB.getInt8(FeatureMask); + // The feature mask will be placed after the function size. + Constant *CFM = IRB.getInt64(FeatureMask); F.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections({{Section, {CFM}}})); } @@ -380,7 +381,7 @@ } bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, - MDBuilder &MDB, uint32_t &FeatureMask) { + MDBuilder &MDB, uint64_t &FeatureMask) { SmallVector InstMetadata; bool RequiresCovered = false; @@ -435,8 +436,9 @@ } StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { - // FIXME: Other TargetTriple (req. string pool) - return SectionSuffix; + // FIXME: Other TargetTriples. + // Request ULEB128 encoding for all integer constants. + return StringPool.save(SectionSuffix + "!C"); } Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { diff --git a/llvm/test/CodeGen/X86/pcsections.ll b/llvm/test/CodeGen/X86/pcsections.ll --- a/llvm/test/CodeGen/X86/pcsections.ll +++ b/llvm/test/CodeGen/X86/pcsections.ll @@ -137,9 +137,32 @@ ret void } +define void @multiple_uleb128() !pcsections !6 { +; CHECK-LABEL: multiple_uleb128: +; CHECK: .section section_aux,"awo",@progbits,.text +; CHECK-NEXT: .Lpcsection_base8: +; DEFCM-NEXT: .long .Lfunc_begin3-.Lpcsection_base8 +; LARGE-NEXT: .quad .Lfunc_begin3-.Lpcsection_base8 +; CHECK-NEXT: .uleb128 .Lfunc_end6-.Lfunc_begin3 +; CHECK-NEXT: .byte 42 +; CHECK-NEXT: .ascii "\345\216&" +; CHECK-NEXT: .byte 255 +; CHECK-NEXT: .section section_aux_21264,"awo",@progbits,.text +; CHECK-NEXT: .Lpcsection_base9: +; DEFCM-NEXT: .long .Lfunc_begin3-.Lpcsection_base9 +; LARGE-NEXT: .quad .Lfunc_begin3-.Lpcsection_base9 +; CHECK-NEXT: .long .Lfunc_end6-.Lfunc_begin3 +; CHECK-NEXT: .long 21264 +; CHECK-NEXT: .text +entry: + ret void +} + !0 = !{!"section_no_aux"} !1 = !{!"section_aux", !3} !2 = !{!"section_aux_42", !4, !"section_aux_21264", !5} !3 = !{i32 10, i32 20, i32 30} !4 = !{i32 42} !5 = !{i32 21264} +!6 = !{!"section_aux!C", !7, !"section_aux_21264", !5} +!7 = !{i64 42, i32 624485, i8 255} diff --git a/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll b/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll --- a/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll +++ b/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll @@ -2071,6 +2071,6 @@ ; CHECK-DAG: ret: ; CHECK-NEXT: ret void -; CHECK: !0 = !{!"sanmd_covered", !1} -; CHECK: !1 = !{i8 1} -; CHECK: !2 = !{!"sanmd_atomics"} +; CHECK: !0 = !{!"sanmd_covered!C", !1} +; CHECK: !1 = !{i64 1} +; CHECK: !2 = !{!"sanmd_atomics!C"}