Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -131,6 +131,7 @@ void initializeThreadSanitizerPass(PassRegistry&); void initializeSanitizerCoverageModulePass(PassRegistry&); void initializeDataFlowSanitizerPass(PassRegistry&); +void initializeEfficiencySanitizerPass(PassRegistry&); void initializeScalarizerPass(PassRegistry&); void initializeEarlyCSELegacyPassPass(PassRegistry &); void initializeEliminateAvailableExternallyPass(PassRegistry&); Index: include/llvm/Transforms/Instrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation.h +++ include/llvm/Transforms/Instrumentation.h @@ -116,6 +116,19 @@ const std::vector &ABIListFiles = std::vector(), void *(*getArgTLS)() = nullptr, void *(*getRetValTLS)() = nullptr); +// Options for EfficiencySanitizer sub-tools. +struct EfficiencySanitizerOptions { + EfficiencySanitizerOptions() : ToolType(ESAN_None) {} + enum Type { + ESAN_None = 0, + ESAN_CacheFrag, + } ToolType; +}; + +// Insert SanitizerCoverage instrumentation. +FunctionPass *createEfficiencySanitizerPass( + const EfficiencySanitizerOptions &Options = EfficiencySanitizerOptions()); + // Options for sanitizer coverage instrumentation. struct SanitizerCoverageOptions { SanitizerCoverageOptions() Index: lib/Transforms/Instrumentation/CMakeLists.txt =================================================================== --- lib/Transforms/Instrumentation/CMakeLists.txt +++ lib/Transforms/Instrumentation/CMakeLists.txt @@ -9,6 +9,7 @@ PGOInstrumentation.cpp SanitizerCoverage.cpp ThreadSanitizer.cpp + EfficiencySanitizer.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms Index: lib/Transforms/Instrumentation/EfficiencySanitizer.cpp =================================================================== --- /dev/null +++ lib/Transforms/Instrumentation/EfficiencySanitizer.cpp @@ -0,0 +1,330 @@ +//===-- EfficiencySanitizer.cpp - performance tuner -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of EfficiencySanitizer, a family of performance tuners +// that detects multiple performance issues via separate sub-tools. +// +// The instrumentation phase is straightforward: +// - Take action on every memory access: either inlined instrumentation, +// or Inserted calls to our run-time library. +// - Optimizations may apply to avoid instrumenting some of the accesses. +// - Turn mem{set,cpy,move} instrinsics into library calls. +// The rest is handled by the run-time library. +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "esan" + +// The tool type must be just one of these ClTool* options, as the tools +// cannot be combined due to shadow memory constraints. +static cl::opt ClToolCacheFrag( + "esan-cache-frag", cl::init(false), + cl::desc("Detect cache fragmentation"), cl::Hidden); +// Each new tool will get its own opt flag here. + +static cl::opt ClInstrumentMemoryAccesses( + "esan-instrument-memory-accesses", cl::init(true), + cl::desc("Instrument memory accesses"), cl::Hidden); +static cl::opt ClInstrumentMemIntrinsics( + "esan-instrument-memintrinsics", cl::init(true), + cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); + +STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); +STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); +STATISTIC(NumFastpaths, "Number of instrumented fastpaths"); +STATISTIC(NumAccessesWithBadSize, "Number of accesses with a bad size"); + +static const char *const kEsanModuleCtorName = "esan.module_ctor"; +static const char *const kEsanInitName = "__esan_init"; + +namespace { + +static void OverrideOptionsFromCL(const EfficiencySanitizerOptions &Options) { + if (Options.ToolType == EfficiencySanitizerOptions::ESAN_CacheFrag) + ClToolCacheFrag = true; + + // Direct opt invocation with no params will have the default ESAN_None. + if (!ClToolCacheFrag) + ClToolCacheFrag = true; // Default tool + + // Exactly one tool must be enabled. + assert(ClToolCacheFrag); +} + +/// EfficiencySanitizer: instrument each module to find performance issues. +class EfficiencySanitizer : public FunctionPass { + public: + EfficiencySanitizer( + const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions()) + : FunctionPass(ID), Options(Opts) { + OverrideOptionsFromCL(Opts); + } + const char *getPassName() const override; + bool runOnFunction(Function &F) override; + bool doInitialization(Module &M) override; + static char ID; + + private: + void initializeCallbacks(Module &M); + bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL); + bool instrumentMemIntrinsic(Instruction *I); + bool ignoreMemoryAccess(Instruction *I); + int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL); + bool instrumentFastpath(Instruction *I, const DataLayout &DL, bool IsWrite, + Value *Addr, unsigned Alignment); + // Each tool has its own fastpath routine: + bool instrumentFastpathCacheFrag(Instruction *I, const DataLayout &DL, + Value *Addr, unsigned Alignment); + + EfficiencySanitizerOptions Options; + LLVMContext *Cxt; + Type *IntptrTy; + // Our slowpath involves callouts to the runtime library. + // Access sizes are powers of two: 1, 2, 4, 8, 16. + static const size_t kNumberOfAccessSizes = 5; + Function *EsanAlignedRead[kNumberOfAccessSizes]; + Function *EsanAlignedWrite[kNumberOfAccessSizes]; + Function *EsanUnalignedRead[kNumberOfAccessSizes]; + Function *EsanUnalignedWrite[kNumberOfAccessSizes]; + Function *MemmoveFn, *MemcpyFn, *MemsetFn; + Function *EsanCtorFunction; +}; +} // namespace + +char EfficiencySanitizer::ID = 0; +INITIALIZE_PASS(EfficiencySanitizer, "esan", + "EfficiencySanitizer: finds performance issues.", + false, false) + +const char *EfficiencySanitizer::getPassName() const { + return "EfficiencySanitizer"; +} + +FunctionPass *llvm::createEfficiencySanitizerPass( + const EfficiencySanitizerOptions &Options) { + return new EfficiencySanitizer(Options); +} + +void EfficiencySanitizer::initializeCallbacks(Module &M) { + IRBuilder<> IRB(M.getContext()); + // Initialize the callbacks. + for (size_t i = 0; i < kNumberOfAccessSizes; ++i) { + const unsigned ByteSize = 1U << i; + std::string ByteSizeStr = utostr(ByteSize); + // We'll inline the most common (i.e., aligned and frequent sizes) + // read + write instrumentation: these callouts are for the slowpath. + SmallString<32> AlignedReadName("__esan_aligned_read" + ByteSizeStr); + EsanAlignedRead[i] = checkSanitizerInterfaceFunction + (M.getOrInsertFunction(AlignedReadName, IRB.getVoidTy(), + IRB.getInt8PtrTy(), nullptr)); + SmallString<32> AlignedWriteName("__esan_aligned_write" + ByteSizeStr); + EsanAlignedWrite[i] = checkSanitizerInterfaceFunction + (M.getOrInsertFunction(AlignedWriteName, IRB.getVoidTy(), + IRB.getInt8PtrTy(), nullptr)); + SmallString<64> UnalignedReadName("__esan_unaligned_read" + ByteSizeStr); + EsanUnalignedRead[i] = checkSanitizerInterfaceFunction + (M.getOrInsertFunction(UnalignedReadName, IRB.getVoidTy(), IRB. + getInt8PtrTy(), nullptr)); + SmallString<64> UnalignedWriteName("__esan_unaligned_write" + ByteSizeStr); + EsanUnalignedWrite[i] = checkSanitizerInterfaceFunction + (M.getOrInsertFunction(UnalignedWriteName, IRB.getVoidTy(), IRB. + getInt8PtrTy(), nullptr)); + } + MemmoveFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemcpyFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IntptrTy, nullptr)); + MemsetFn = checkSanitizerInterfaceFunction( + M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IRB.getInt32Ty(), IntptrTy, nullptr)); +} + +bool EfficiencySanitizer::doInitialization(Module &M) { + Cxt = &(M.getContext()); + const DataLayout &DL = M.getDataLayout(); + IRBuilder<> IRB(M.getContext()); + IntegerType *OrdTy = IRB.getInt32Ty(); + IntptrTy = DL.getIntPtrType(M.getContext()); + std::tie(EsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions( + M, kEsanModuleCtorName, kEsanInitName, /*InitArgTypes=*/{OrdTy}, + /*InitArgs=*/{ConstantInt::get(OrdTy, + static_cast(Options.ToolType))}); + + appendToGlobalCtors(M, EsanCtorFunction, 0); + + return true; +} + +bool EfficiencySanitizer::ignoreMemoryAccess(Instruction *I) { + if (ClToolCacheFrag) { + // We'd like to know about cache fragmentation in vtable accesses and + // constant data references, so we do not currently ignore anything. + return false; + } + return false; +} + +bool EfficiencySanitizer::runOnFunction(Function &F) { + // This is required to prevent instrumenting the call to __esan_init from + // within the module constructor. + if (&F == EsanCtorFunction) + return false; + initializeCallbacks(*F.getParent()); + SmallVector LoadsAndStores; + SmallVector MemIntrinCalls; + bool Res = false; + const DataLayout &DL = F.getParent()->getDataLayout(); + + for (auto &BB : F) { + for (auto &Inst : BB) { + if ((isa(Inst) || isa(Inst) || + isa(Inst) || isa(Inst)) && + !ignoreMemoryAccess(&Inst)) + LoadsAndStores.push_back(&Inst); + else if (isa(Inst)) + MemIntrinCalls.push_back(&Inst); + } + } + + if (ClInstrumentMemoryAccesses) { + for (auto Inst : LoadsAndStores) { + Res |= instrumentLoadOrStore(Inst, DL); + } + } + + if (ClInstrumentMemIntrinsics) { + for (auto Inst : MemIntrinCalls) { + Res |= instrumentMemIntrinsic(Inst); + } + } + + return Res; +} + +bool EfficiencySanitizer::instrumentLoadOrStore(Instruction *I, + const DataLayout &DL) { + IRBuilder<> IRB(I); + bool IsWrite; + Value *Addr; + unsigned Alignment; + if (LoadInst *Load = dyn_cast(I)) { + IsWrite = false; + Alignment = Load->getAlignment(); + Addr = Load->getPointerOperand(); + } else if (StoreInst *Store = dyn_cast(I)) { + IsWrite = true; + Alignment = Store->getAlignment(); + Addr = Store->getPointerOperand(); + } else if (AtomicRMWInst *RMW = dyn_cast(I)) { + IsWrite = true; + Alignment = 0; + Addr = RMW->getPointerOperand(); + } else if (AtomicCmpXchgInst *Xchg = dyn_cast(I)) { + IsWrite = true; + Alignment = 0; + Addr = Xchg->getPointerOperand(); + } else + llvm_unreachable("Unsupported mem access type"); + + int Idx = getMemoryAccessFuncIndex(Addr, DL); + if (Idx < 0) + return false; + Type *OrigTy = cast(Addr->getType())->getElementType(); + const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); + Value *OnAccessFunc = nullptr; + if (IsWrite) + NumInstrumentedWrites++; + else + NumInstrumentedReads++; + if (instrumentFastpath(I, DL, IsWrite, Addr, Alignment)) { + NumFastpaths++; + return true; + } + if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) + OnAccessFunc = IsWrite ? EsanAlignedWrite[Idx] : EsanAlignedRead[Idx]; + else + OnAccessFunc = IsWrite ? EsanUnalignedWrite[Idx] : EsanUnalignedRead[Idx]; + IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy())); + return true; +} + +// It's simplest to replace the memset/memmove/memcpy intrinsics with +// calls that the runtime library intercepts. +// Our pass is late enough that calls should not turn back into intrinsics. +bool EfficiencySanitizer::instrumentMemIntrinsic(Instruction *I) { + IRBuilder<> IRB(I); + if (MemSetInst *M = dyn_cast(I)) { + IRB.CreateCall( + MemsetFn, + {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + I->eraseFromParent(); + } else if (MemTransferInst *M = dyn_cast(I)) { + IRB.CreateCall( + isa(M) ? MemcpyFn : MemmoveFn, + {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + I->eraseFromParent(); + } + return false; +} + +int EfficiencySanitizer::getMemoryAccessFuncIndex(Value *Addr, + const DataLayout &DL) { + Type *OrigPtrTy = Addr->getType(); + Type *OrigTy = cast(OrigPtrTy)->getElementType(); + assert(OrigTy->isSized()); + uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); + if (TypeSize != 8 && TypeSize != 16 && + TypeSize != 32 && TypeSize != 64 && TypeSize != 128) { + NumAccessesWithBadSize++; + // Ignore all unusual sizes. + return -1; + } + size_t Idx = countTrailingZeros(TypeSize / 8); + assert(Idx < kNumberOfAccessSizes); + return Idx; +} + +bool EfficiencySanitizer::instrumentFastpath(Instruction *I, + const DataLayout &DL, + bool IsWrite, + Value *Addr, + unsigned Alignment) { + if (ClToolCacheFrag) { + return instrumentFastpathCacheFrag(I, DL, Addr, Alignment); + } + return false; +} + +bool EfficiencySanitizer::instrumentFastpathCacheFrag( + Instruction *I, const DataLayout &DL, Value *Addr, unsigned Alignment) { + // TODO(bruening): implement a fastpath for aligned accesses + return false; +} Index: lib/Transforms/Instrumentation/Instrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/Instrumentation.cpp +++ lib/Transforms/Instrumentation/Instrumentation.cpp @@ -67,6 +67,7 @@ initializeThreadSanitizerPass(Registry); initializeSanitizerCoverageModulePass(Registry); initializeDataFlowSanitizerPass(Registry); + initializeEfficiencySanitizerPass(Registry); } /// LLVMInitializeInstrumentation - C binding for Index: test/Instrumentation/EfficiencySanitizer/cache_frag_basic.ll =================================================================== --- /dev/null +++ test/Instrumentation/EfficiencySanitizer/cache_frag_basic.ll @@ -0,0 +1,52 @@ +; Test basic EfficiencySanitizer cache frag instrumentation. +; +; RUN: opt < %s -esan -esan-cache-frag -S | FileCheck %s + +define i32 @loadWord(i32* %a) { +entry: + %tmp1 = load i32, i32* %a, align 4 + ret i32 %tmp1 +} + +; CHECK: @llvm.global_ctors = {{.*}}@esan.module_ctor + +; CHECK: %0 = bitcast i32* %a to i8* +; CHECK-NEXT: call void @__esan_aligned_read4(i8* %0) +; CHECK-NEXT: %tmp1 = load i32, i32* %a, align 4 +; CHECK-NEXT: ret i32 %tmp1 + +; Ensure that esan converts memcpy intrinsics to calls: + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) + +define void @memCpyTest(i8* nocapture %x, i8* nocapture %y) { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false) + ret void +; CHECK: define void @memCpyTest +; CHECK: call i8* @memcpy +; CHECK: ret void +} + +define void @memMoveTest(i8* nocapture %x, i8* nocapture %y) { +entry: + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 16, i32 4, i1 false) + ret void +; CHECK: define void @memMoveTest +; CHECK: call i8* @memmove +; CHECK: ret void +} + +define void @memSetTest(i8* nocapture %x) { +entry: + tail call void @llvm.memset.p0i8.i64(i8* %x, i8 77, i64 16, i32 4, i1 false) + ret void +; CHECK: define void @memSetTest +; CHECK: call i8* @memset +; CHECK: ret void +} + +; CHECK: define internal void @esan.module_ctor() +; CHECK: call void @__esan_init(i32 0)