diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -154,6 +154,13 @@ SanitizerCoverageOptions() = default; }; +/// Options for SanitizerBinaryMetadata. +struct SanitizerBinaryMetadataOptions { + bool Covered = false; + bool Atomics = false; + SanitizerBinaryMetadataOptions() = default; +}; + /// Calculate what to divide by to scale counts. /// /// Given the maximum count, calculate a divisor that will scale all the diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h @@ -0,0 +1,40 @@ +//===------- Definition of the SanitizerBinaryMetadata class ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the SanitizerBinaryMetadata pass. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERBINARYMETADATA_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERBINARYMETADATA_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Instrumentation.h" + +namespace llvm { + +/// Public interface to the SanitizerBinaryMetadata module pass for emitting +/// metadata for binary analysis sanitizers. +// +/// The pass should be inserted after optimizations. +class ModuleSanitizerBinaryMetadataPass + : public PassInfoMixin { +public: + explicit ModuleSanitizerBinaryMetadataPass( + SanitizerBinaryMetadataOptions Opts = {}); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } + +private: + const SanitizerBinaryMetadataOptions Options; +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -138,6 +138,7 @@ #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -123,6 +123,7 @@ MODULE_PASS("module-inline", ModuleInlinerPass()) MODULE_PASS("tsan-module", ModuleThreadSanitizerPass()) MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) +MODULE_PASS("sanmd-module", ModuleSanitizerBinaryMetadataPass()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -15,6 +15,7 @@ PGOMemOPSizeOpt.cpp PoisonChecking.cpp SanitizerCoverage.cpp + SanitizerBinaryMetadata.cpp ValueProfileCollector.cpp ThreadSanitizer.cpp HWAddressSanitizer.cpp diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp @@ -0,0 +1,338 @@ +//===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of SanitizerBinaryMetadata. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "sanmd" + +namespace { + +//===--- Constants --------------------------------------------------------===// + +constexpr uint32_t kVersionBase = 1; // occupies lower 16 bits +constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized +constexpr int kCtorDtorPriority = 2; + +// Pairs of names of initialization callback functions and which section +// contains the relevant metadata. +class MetadataInfo { +public: + const StringRef FunctionPrefix; + const StringRef SectionSuffix; + const uint32_t FeatureMask; + + static const MetadataInfo Covered; + static const MetadataInfo Atomics; + +private: + // Forbid construction elsewhere. + explicit constexpr MetadataInfo(StringRef FunctionPrefix, + StringRef SectionSuffix, int Feature) + : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix), + FeatureMask(Feature != -1 ? (1u << Feature) : 0) {} +}; +const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered", + "sanmd_covered", -1}; +const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics", + "sanmd_atomics", 0}; + +// The only instances of MetadataInfo are the constants above, so a set of +// them may simply store pointers to them. To deterministically generate code, +// we need to use a set with stable iteration order, such as SetVector. +using MetadataInfoSet = SetVector; + +//===--- Command-line options ---------------------------------------------===// + +cl::opt ClEmitCovered("sanitizer-metadata-covered", + cl::desc("Emit PCs for covered functions."), + cl::Hidden, cl::init(false)); +cl::opt ClEmitAtomics("sanitizer-metadata-atomics", + cl::desc("Emit PCs for atomic operations."), + cl::Hidden, cl::init(false)); + +//===--- Statistics -------------------------------------------------------===// + +STATISTIC(NumMetadataCovered, "Metadata attached to covered functions"); +STATISTIC(NumMetadataAtomics, "Metadata attached to atomics"); + +//===----------------------------------------------------------------------===// + +// Apply opt overrides. +SanitizerBinaryMetadataOptions && +transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) { + Opts.Covered |= ClEmitCovered; + Opts.Atomics |= ClEmitAtomics; + return std::move(Opts); +} + +class ModuleSanitizerBinaryMetadata { +public: + ModuleSanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts) + : Mod(M), Options(transformOptionsFromCl(std::move(Opts))), + TargetTriple(M.getTargetTriple()), IRB(M.getContext()) { + // FIXME: Make it work with other formats. + assert(TargetTriple.isOSBinFormatELF() && "ELF prototype only"); + } + + bool run(); + +private: + // Return enabled feature mask of per-instruction metadata. + uint32_t getEnabledPerInstructionFeature() const { + uint32_t FeatureMask = 0; + if (Options.Atomics) + FeatureMask |= MetadataInfo::Atomics.FeatureMask; + return FeatureMask; + } + + uint32_t getVersion() const { + uint32_t Version = kVersionBase; + const auto CM = Mod.getCodeModel(); + if (CM.hasValue() && (*CM == CodeModel::Medium || *CM == CodeModel::Large)) + Version |= kVersionPtrSizeRel; + return Version; + } + + void runOn(Function &F, MetadataInfoSet &MIS); + + // Determines which set of metadata to collect for this instruction. + // + // Returns true if covered metadata is required to unambiguously interpret + // other metadata. For example, if we are interested in atomics metadata, any + // function with memory operations (atomic or not) requires covered metadata + // to determine if a memory operation is atomic or not in modules compiled + // with SanitizerBinaryMetadata. + bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB); + + // Get start/end section marker pointer. + GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty); + + // Create a 0-sized object in a section, so that the section is not discarded + // if all inputs have been discarded. + void createZeroSizedObjectInSection(Type *Ty, StringRef SectionSuffix); + + // Returns the target-dependent section name. + StringRef getSectionName(StringRef SectionSuffix); + + // Returns the section start marker name. + Twine getSectionStart(StringRef SectionSuffix); + + // Returns the section end marker name. + Twine getSectionEnd(StringRef SectionSuffix); + + Module &Mod; + const SanitizerBinaryMetadataOptions Options; + const Triple TargetTriple; + IRBuilder<> IRB; +}; + +bool ModuleSanitizerBinaryMetadata::run() { + MetadataInfoSet MIS; + + for (Function &F : Mod) + runOn(F, MIS); + + if (MIS.empty()) + return false; + + // + // Setup constructors and call all initialization functions for requested + // metadata features. + // + + auto *Int8PtrTy = IRB.getInt8PtrTy(); + auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy); + auto *Int32Ty = IRB.getInt32Ty(); + const std::array InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy}; + auto *Version = ConstantInt::get(Int32Ty, getVersion()); + + for (const MetadataInfo *MI : MIS) { + const std::array InitArgs = { + Version, + getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy), + getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy), + }; + Function *Ctor = + createSanitizerCtorAndInitFunctions( + Mod, (MI->FunctionPrefix + ".module_ctor").str(), + (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs) + .first; + Function *Dtor = + createSanitizerCtorAndInitFunctions( + Mod, (MI->FunctionPrefix + ".module_dtor").str(), + (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs) + .first; + Constant *CtorData = nullptr; + Constant *DtorData = nullptr; + if (TargetTriple.supportsCOMDAT()) { + // Use COMDAT to deduplicate constructor/destructor function. + Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName())); + Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName())); + CtorData = Ctor; + DtorData = Dtor; + } + appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData); + appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData); + createZeroSizedObjectInSection(Int8PtrTy, MI->SectionSuffix); + } + + return true; +} + +void ModuleSanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) { + if (F.empty()) + return; + if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation)) + return; + // Don't touch available_externally functions, their actual body is elsewhere. + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) + return; + + MDBuilder MDB(F.getContext()); + + // The metadata features enabled for this function, stored along covered + // metadata (if enabled). + uint32_t PerInstrFeatureMask = getEnabledPerInstructionFeature(); + // Don't emit unnecessary covered metadata for all functions to save space. + bool RequiresCovered = false; + if (PerInstrFeatureMask) { + for (BasicBlock &BB : F) + for (Instruction &I : BB) + RequiresCovered |= runOn(I, MIS, MDB); + } + + // Covered metadata is always emitted if explicitly requested, otherwise only + // if some other metadata requires it to unambiguously interpret it for + // modules compiled with SanitizerBinaryMetadata. + if (Options.Covered || RequiresCovered) { + NumMetadataCovered++; + const auto *MI = &MetadataInfo::Covered; + MIS.insert(MI); + const StringRef Section = getSectionName(MI->SectionSuffix); + // The feature mask will be placed after the size (32 bit) of the function, + // so in total one covered entry will use `sizeof(void*) + 4 + 4`. + Constant *CFM = IRB.getInt32(PerInstrFeatureMask); + F.setMetadata(LLVMContext::MD_pcsections, + MDB.createPCSections({Section}, {{CFM}})); + } +} + +bool ModuleSanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS, + MDBuilder &MDB) { + SmallVector InstMetadata; + bool RequiresCovered = false; + + if (Options.Atomics && I.mayReadOrWriteMemory()) { + auto SSID = getAtomicSyncScopeID(&I); + if (SSID.hasValue() && SSID.getValue() != SyncScope::SingleThread) { + NumMetadataAtomics++; + InstMetadata.push_back(&MetadataInfo::Atomics); + } + RequiresCovered = true; + } + + // Attach MD_pcsections to instruction. + if (!InstMetadata.empty()) { + MIS.insert(InstMetadata.begin(), InstMetadata.end()); + SmallVector Sections; + for (const auto &MI : InstMetadata) { + Sections.push_back(getSectionName(MI->SectionSuffix)); + } + I.setMetadata(LLVMContext::MD_pcsections, + MDB.createPCSections(Sections, {})); + } + + return RequiresCovered; +} + +GlobalVariable * +ModuleSanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, + Type *Ty) { + auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false, + GlobalVariable::ExternalLinkage, + /*Initializer=*/nullptr, MarkerName); + Marker->setVisibility(GlobalValue::HiddenVisibility); + return Marker; +} + +void ModuleSanitizerBinaryMetadata::createZeroSizedObjectInSection( + Type *Ty, StringRef SectionSuffix) { + auto *DummyInit = ConstantAggregateZero::get(ArrayType::get(Ty, 0)); + auto *DummyEntry = new GlobalVariable(Mod, DummyInit->getType(), true, + GlobalVariable::ExternalLinkage, + DummyInit, "__dummy_" + SectionSuffix); + DummyEntry->setSection(getSectionName(SectionSuffix)); + DummyEntry->setVisibility(GlobalValue::HiddenVisibility); + if (TargetTriple.supportsCOMDAT()) + DummyEntry->setComdat(Mod.getOrInsertComdat(DummyEntry->getName())); + // Make sure the section isn't discarded by gc-sections. + appendToUsed(Mod, DummyEntry); +} + +StringRef +ModuleSanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) { + // FIXME: Other TargetTriple (req. string pool) + return SectionSuffix; +} + +Twine ModuleSanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) { + return "__start_" + SectionSuffix; +} + +Twine ModuleSanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) { + return "__stop_" + SectionSuffix; +} + +} // namespace + +ModuleSanitizerBinaryMetadataPass::ModuleSanitizerBinaryMetadataPass( + SanitizerBinaryMetadataOptions Opts) + : Options(std::move(Opts)) {} + +PreservedAnalyses +ModuleSanitizerBinaryMetadataPass::run(Module &M, AnalysisManager &AM) { + ModuleSanitizerBinaryMetadata Pass(M, Options); + if (Pass.run()) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +}