diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -145,6 +145,7 @@ ///< linker. CODEGENOPT(MergeAllConstants , 1, 1) ///< Merge identical constants. CODEGENOPT(MergeFunctions , 1, 0) ///< Set when -fmerge-functions is enabled. +CODEGENOPT(HeapProf , 1, 0) ///< Set when -fmemprof is enabled. CODEGENOPT(MSVolatile , 1, 0) ///< Set when /volatile:ms is enabled. CODEGENOPT(NoCommon , 1, 0) ///< Set when -fno-common or C++ is enabled. CODEGENOPT(NoDwarfDirectoryAsm , 1, 0) ///< Set when -fno-dwarf-directory-asm is diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -995,6 +995,8 @@ def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group, Flags<[CC1Option]>; +defm memprof : OptInFFlag<"memprof", "Enable", "Disable", " heap memory profiling">; + // Begin sanitizer flags. These should all be core options exposed in all driver // modes. let Flags = [CC1Option, CoreOption] in { diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -55,13 +55,15 @@ bool MinimalRuntime = false; // True if cross-dso CFI support if provided by the system (i.e. Android). bool ImplicitCfiRuntime = false; + bool NeedsHeapProfRt = false; - public: +public: /// Parses the sanitizer arguments from an argument list. SanitizerArgs(const ToolChain &TC, const llvm::opt::ArgList &Args); bool needsSharedRt() const { return SharedRuntime; } + bool needsHeapProfRt() const { return NeedsHeapProfRt; } bool needsAsanRt() const { return Sanitizers.has(SanitizerKind::Address); } bool needsHwasanRt() const { return Sanitizers.has(SanitizerKind::HWAddress); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -67,6 +67,7 @@ #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/HeapProfiler.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" @@ -267,6 +268,12 @@ return false; } +static void addHeapProfilerPasses(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + PM.add(createHeapProfilerFunctionPass()); + PM.add(createModuleHeapProfilerLegacyPassPass()); +} + static void addAddressSanitizerPasses(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { const PassManagerBuilderWrapper &BuilderWrapper = @@ -662,6 +669,13 @@ if (LangOpts.Coroutines) addCoroutinePassesToExtensionPoints(PMBuilder); + if (CodeGenOpts.HeapProf) { + PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, + addHeapProfilerPasses); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, + addHeapProfilerPasses); + } + if (LangOpts.Sanitize.has(SanitizerKind::LocalBounds)) { PMBuilder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate, addBoundsCheckingPass); @@ -1367,6 +1381,11 @@ } } + if (CodeGenOpts.HeapProf) { + MPM.addPass(createModuleToFunctionPassAdaptor(HeapProfilerPass())); + MPM.addPass(ModuleHeapProfilerPass()); + } + if (LangOpts.Sanitize.has(SanitizerKind::HWAddress)) { bool Recover = CodeGenOpts.SanitizeRecover.has(SanitizerKind::HWAddress); MPM.addPass(HWAddressSanitizerPass( diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -866,6 +866,9 @@ LinkCXXRuntimes) || D.CCCIsCXX(); + NeedsHeapProfRt = + Args.hasFlag(options::OPT_fmemprof, options::OPT_fno_memprof, false); + // Finally, initialize the set of available and recoverable sanitizers. Sanitizers.Mask |= Kinds; RecoverableSanitizers.Mask |= RecoverableKinds; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4223,6 +4223,9 @@ if (Args.getLastArg(options::OPT_save_temps_EQ)) Args.AddLastArg(CmdArgs, options::OPT_save_temps_EQ); + if (Args.hasFlag(options::OPT_fmemprof, options::OPT_fno_memprof, false)) + Args.AddLastArg(CmdArgs, options::OPT_fmemprof); + // Embed-bitcode option. // Only white-listed flags below are allowed to be embedded. if (C.getDriver().embedBitcodeInObject() && !C.getDriver().isUsingLTO() && diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -686,6 +686,11 @@ if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid()) HelperStaticRuntimes.push_back("asan-preinit"); } + if (SanArgs.needsHeapProfRt() && SanArgs.linkRuntimes()) { + SharedRuntimes.push_back("heapprof"); + if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid()) + HelperStaticRuntimes.push_back("heapprof-preinit"); + } if (SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) { if (SanArgs.requiresMinimalRuntime()) SharedRuntimes.push_back("ubsan_minimal"); @@ -721,6 +726,13 @@ StaticRuntimes.push_back("asan_cxx"); } + if (!SanArgs.needsSharedRt() && SanArgs.needsHeapProfRt() && + SanArgs.linkRuntimes()) { + StaticRuntimes.push_back("heapprof"); + if (SanArgs.linkCXXRuntimes()) + StaticRuntimes.push_back("heapprof_cxx"); + } + if (!SanArgs.needsSharedRt() && SanArgs.needsHwasanRt() && SanArgs.linkRuntimes()) { StaticRuntimes.push_back("hwasan"); if (SanArgs.linkCXXRuntimes()) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1033,6 +1033,8 @@ Opts.ThinLinkBitcodeFile = std::string(Args.getLastArgValue(OPT_fthin_link_bitcode_EQ)); + Opts.HeapProf = Args.hasArg(OPT_fmemprof); + Opts.MSVolatile = Args.hasArg(OPT_fms_volatile); Opts.VectorizeLoop = Args.hasArg(OPT_vectorize_loops); diff --git a/clang/test/Driver/fmemprof.cpp b/clang/test/Driver/fmemprof.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Driver/fmemprof.cpp @@ -0,0 +1,6 @@ +// RUN: %clangxx -target x86_64-linux-gnu -fmemprof %s -### 2>&1 | FileCheck %s +// RUN: %clangxx -target x86_64-linux-gnu -fmemprof -fno-memprof %s -### 2>&1 | FileCheck %s --check-prefix=OFF +// CHECK: "-cc1" {{.*}} "-fmemprof" +// CHECK: ld{{.*}}libclang_rt.heapprof{{.*}}libclang_rt.heapprof_cxx +// OFF-NOT: "-fmemprof" +// OFF-NOT: libclang_rt.heapprof diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -176,6 +176,7 @@ void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&); void initializeHardwareLoopsPass(PassRegistry&); +void initializeHeapProfilerLegacyPassPass(PassRegistry &); void initializeHotColdSplittingLegacyPassPass(PassRegistry&); void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &); void initializeIPSCCPLegacyPassPass(PassRegistry&); @@ -303,6 +304,7 @@ void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&); void initializeMetaRenamerPass(PassRegistry&); void initializeModuleDebugInfoPrinterPass(PassRegistry&); +void initializeModuleHeapProfilerLegacyPassPass(PassRegistry &); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); void initializeModuloScheduleTestPass(PassRegistry&); void initializeMustExecutePrinterPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/HeapProfiler.h @@ -0,0 +1,51 @@ +//===--------- Definition of the HeapProfiler class ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the HeapProfiler class. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_HEAPPROFILER_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_HEAPPROFILER_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Public interface to the heap profiler pass for instrumenting code to +/// profile heap memory accesses. +/// +/// The profiler itself is a function pass that works by inserting various +/// calls to the HeapProfiler runtime library functions. The runtime library +/// essentially replaces malloc() and free() with custom implementations that +/// record data about the allocations. +class HeapProfilerPass : public PassInfoMixin { +public: + explicit HeapProfilerPass(); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Public interface to the heap profiler module pass for instrumenting code +/// to profile heap memory allocations and accesses. +class ModuleHeapProfilerPass : public PassInfoMixin { +public: + explicit ModuleHeapProfilerPass(); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: +}; + +// Insert HeapProfiler instrumentation +FunctionPass *createHeapProfilerFunctionPass(); +ModulePass *createModuleHeapProfilerLegacyPassPass(); + +} // namespace llvm + +#endif diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -110,6 +110,7 @@ #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/HeapProfiler.h" #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" @@ -258,6 +259,10 @@ cl::Hidden, cl::desc("Enable inline deferral during PGO")); +static cl::opt EnableHeapProfiler("enable-heap-prof", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable heap profiler")); + PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; @@ -1034,6 +1039,12 @@ MPM.addPass(SyntheticCountsPropagation()); MPM.addPass(buildInlinerPipeline(Level, Phase, DebugLogging)); + + if (EnableHeapProfiler && Phase != ThinLTOPhase::PreLink) { + MPM.addPass(createModuleToFunctionPassAdaptor(HeapProfilerPass())); + MPM.addPass(ModuleHeapProfilerPass()); + } + return MPM; } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -97,6 +97,7 @@ MODULE_PASS("tsan-module", ThreadSanitizerPass()) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) +MODULE_PASS("heapprof-module", ModuleHeapProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) #undef MODULE_PASS @@ -276,6 +277,7 @@ FUNCTION_PASS("msan", MemorySanitizerPass({})) FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true})) FUNCTION_PASS("tsan", ThreadSanitizerPass()) +FUNCTION_PASS("heapprof", HeapProfilerPass()) #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -5,6 +5,7 @@ ControlHeightReduction.cpp DataFlowSanitizer.cpp GCOVProfiling.cpp + HeapProfiler.cpp MemorySanitizer.cpp IndirectCallPromotion.cpp Instrumentation.cpp diff --git a/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp b/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/HeapProfiler.cpp @@ -0,0 +1,613 @@ +//===- HeapProfiler.cpp - heap allocation and access profiler +//--------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of HeapProfiler. Memory accesses are instrumented +// to increment the access count held in a shadow memory location, or +// alternatively to call into the runtime. Memory intrinsic calls (memmove, +// memcpy, memset) are changed to call the heap profiling runtime version +// instead. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/HeapProfiler.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "heapprof" + +constexpr int LLVM_HEAP_PROFILER_VERSION = 1; + +// Size of memory mapped to a single shadow location. +constexpr uint64_t DefaultShadowGranularity = 64; + +// Scale from granularity down to shadow size. +constexpr uint64_t DefaultShadowScale = 3; + +constexpr char HeapProfModuleCtorName[] = "heapprof.module_ctor"; +constexpr uint64_t HeapProfCtorAndDtorPriority = 1; +// On Emscripten, the system needs more than one priorities for constructors. +constexpr uint64_t HeapProfEmscriptenCtorAndDtorPriority = 50; +constexpr char HeapProfInitName[] = "__heapprof_init"; +constexpr char HeapProfVersionCheckNamePrefix[] = + "__heapprof_version_mismatch_check_v"; + +constexpr char HeapProfShadowMemoryDynamicAddress[] = + "__heapprof_shadow_memory_dynamic_address"; + +// Command-line flags. + +static cl::opt ClInsertVersionCheck( + "heapprof-guard-against-version-mismatch", + cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden, + cl::init(true)); + +// This flag may need to be replaced with -f[no-]memprof-reads. +static cl::opt ClInstrumentReads("heapprof-instrument-reads", + cl::desc("instrument read instructions"), + cl::Hidden, cl::init(true)); + +static cl::opt + ClInstrumentWrites("heapprof-instrument-writes", + cl::desc("instrument write instructions"), cl::Hidden, + cl::init(true)); + +static cl::opt ClInstrumentAtomics( + "heapprof-instrument-atomics", + cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden, + cl::init(true)); + +static cl::opt ClUseCalls( + "heapprof-use-callbacks", + cl::desc("Use callbacks instead of inline instrumentation sequences."), + cl::Hidden, cl::init(false)); + +static cl::opt + ClMemoryAccessCallbackPrefix("heapprof-memory-access-callback-prefix", + cl::desc("Prefix for memory access callbacks"), + cl::Hidden, cl::init("__heapprof_")); + +// These flags allow to change the shadow mapping. +// The shadow mapping looks like +// Shadow = ((Mem & mask) >> scale) + offset + +static cl::opt ClMappingScale("heapprof-mapping-scale", + cl::desc("scale of heapprof shadow mapping"), + cl::Hidden, cl::init(DefaultShadowScale)); + +static cl::opt + ClMappingGranularity("heapprof-mapping-granularity", + cl::desc("granularity of heapprof shadow mapping"), + cl::Hidden, cl::init(DefaultShadowGranularity)); + +// Debug flags. + +static cl::opt ClDebug("heapprof-debug", cl::desc("debug"), cl::Hidden, + cl::init(0)); + +static cl::opt ClDebugFunc("heapprof-debug-func", cl::Hidden, + cl::desc("Debug func")); + +static cl::opt ClDebugMin("heapprof-debug-min", cl::desc("Debug min inst"), + cl::Hidden, cl::init(-1)); + +static cl::opt ClDebugMax("heapprof-debug-max", cl::desc("Debug max inst"), + cl::Hidden, cl::init(-1)); + +STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); +STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); + +namespace { + +/// This struct defines the shadow mapping using the rule: +/// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset. +struct ShadowMapping { + ShadowMapping() { + Scale = ClMappingScale; + Granularity = ClMappingGranularity; + Mask = ~(Granularity - 1); + } + + int Scale; + int Granularity; + uint64_t Mask; // Computed as ~(Granularity-1) +}; + +static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) { + return TargetTriple.isOSEmscripten() ? HeapProfEmscriptenCtorAndDtorPriority + : HeapProfCtorAndDtorPriority; +} + +struct InterestingMemoryAccess { + Value *Addr = nullptr; + bool IsWrite; + unsigned Alignment; + uint64_t TypeSize; + Value *MaybeMask = nullptr; +}; + +/// Instrument the code in module to profile heap accesses. +class HeapProfiler { +public: + HeapProfiler(Module &M) { + C = &(M.getContext()); + LongSize = M.getDataLayout().getPointerSizeInBits(); + IntptrTy = Type::getIntNTy(*C, LongSize); + } + + /// If it is an interesting memory access, populate information + /// about the access and return a InterestingMemoryAccess struct. + /// Otherwise return None. + Optional isInterestingMemoryAccess(Instruction *I); + + void instrumentMop(Instruction *I, const DataLayout &DL, + InterestingMemoryAccess &Access); + void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, + Value *Addr, uint32_t TypeSize, bool IsWrite); + void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask, + Instruction *I, Value *Addr, + unsigned Alignment, uint32_t TypeSize, + bool IsWrite); + void instrumentMemIntrinsic(MemIntrinsic *MI); + Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); + bool instrumentFunction(Function &F); + bool maybeInsertHeapProfInitAtFunctionEntry(Function &F); + void insertDynamicShadowAtFunctionEntry(Function &F); + +private: + void initializeCallbacks(Module &M); + + LLVMContext *C; + int LongSize; + Type *IntptrTy; + ShadowMapping Mapping; + + // These arrays is indexed by AccessIsWrite + FunctionCallee HeapProfMemoryAccessCallback[2]; + FunctionCallee HeapProfMemoryAccessCallbackSized[2]; + + FunctionCallee HeapProfMemmove, HeapProfMemcpy, HeapProfMemset; + Value *DynamicShadowOffset = nullptr; +}; + +class HeapProfilerLegacyPass : public FunctionPass { +public: + static char ID; + + explicit HeapProfilerLegacyPass() : FunctionPass(ID) { + initializeHeapProfilerLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "HeapProfilerFunctionPass"; } + + bool runOnFunction(Function &F) override { + HeapProfiler Profiler(*F.getParent()); + return Profiler.instrumentFunction(F); + } +}; + +class ModuleHeapProfiler { +public: + ModuleHeapProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); } + + bool instrumentModule(Module &); + +private: + Triple TargetTriple; + ShadowMapping Mapping; + Function *HeapProfCtorFunction = nullptr; +}; + +class ModuleHeapProfilerLegacyPass : public ModulePass { +public: + static char ID; + + explicit ModuleHeapProfilerLegacyPass() : ModulePass(ID) { + initializeModuleHeapProfilerLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "ModuleHeapProfiler"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override {} + + bool runOnModule(Module &M) override { + ModuleHeapProfiler HeapProfiler(M); + return HeapProfiler.instrumentModule(M); + } +}; + +} // end anonymous namespace + +HeapProfilerPass::HeapProfilerPass() {} + +PreservedAnalyses HeapProfilerPass::run(Function &F, + AnalysisManager &AM) { + Module &M = *F.getParent(); + HeapProfiler Profiler(M); + if (Profiler.instrumentFunction(F)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); + + return PreservedAnalyses::all(); +} + +ModuleHeapProfilerPass::ModuleHeapProfilerPass() {} + +PreservedAnalyses ModuleHeapProfilerPass::run(Module &M, + AnalysisManager &AM) { + ModuleHeapProfiler Profiler(M); + if (Profiler.instrumentModule(M)) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +char HeapProfilerLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(HeapProfilerLegacyPass, "heapprof", + "HeapProfiler: profile heap allocations and accesses.", + false, false) +INITIALIZE_PASS_END(HeapProfilerLegacyPass, "heapprof", + "HeapProfiler: profile heap allocations and accesses.", + false, false) + +FunctionPass *llvm::createHeapProfilerFunctionPass() { + return new HeapProfilerLegacyPass(); +} + +char ModuleHeapProfilerLegacyPass::ID = 0; + +INITIALIZE_PASS(ModuleHeapProfilerLegacyPass, "heapprof-module", + "HeapProfiler: profile heap allocations and accesses." + "ModulePass", + false, false) + +ModulePass *llvm::createModuleHeapProfilerLegacyPassPass() { + return new ModuleHeapProfilerLegacyPass(); +} + +Value *HeapProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) { + // (Shadow & mask) >> scale + Shadow = IRB.CreateAnd(Shadow, Mapping.Mask); + Shadow = IRB.CreateLShr(Shadow, Mapping.Scale); + // (Shadow >> scale) | offset + assert(DynamicShadowOffset); + return IRB.CreateAdd(Shadow, DynamicShadowOffset); +} + +// Instrument memset/memmove/memcpy +void HeapProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) { + IRBuilder<> IRB(MI); + if (isa(MI)) { + IRB.CreateCall( + isa(MI) ? HeapProfMemmove : HeapProfMemcpy, + {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); + } else if (isa(MI)) { + IRB.CreateCall( + HeapProfMemset, + {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)}); + } + MI->eraseFromParent(); +} + +Optional +HeapProfiler::isInterestingMemoryAccess(Instruction *I) { + // Do not instrument the load fetching the dynamic shadow address. + if (DynamicShadowOffset == I) + return None; + + InterestingMemoryAccess Access; + + const DataLayout &DL = I->getModule()->getDataLayout(); + if (LoadInst *LI = dyn_cast(I)) { + if (!ClInstrumentReads) + return None; + Access.IsWrite = false; + Access.TypeSize = DL.getTypeStoreSizeInBits(LI->getType()); + Access.Alignment = LI->getAlignment(); + Access.Addr = LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(I)) { + if (!ClInstrumentWrites) + return None; + Access.IsWrite = true; + Access.TypeSize = + DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType()); + Access.Alignment = SI->getAlignment(); + Access.Addr = SI->getPointerOperand(); + } else if (AtomicRMWInst *RMW = dyn_cast(I)) { + if (!ClInstrumentAtomics) + return None; + Access.IsWrite = true; + Access.TypeSize = + DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType()); + Access.Alignment = 0; + Access.Addr = RMW->getPointerOperand(); + } else if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { + if (!ClInstrumentAtomics) + return None; + Access.IsWrite = true; + Access.TypeSize = + DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType()); + Access.Alignment = 0; + Access.Addr = XCHG->getPointerOperand(); + } else if (auto *CI = dyn_cast(I)) { + auto *F = CI->getCalledFunction(); + if (F && (F->getIntrinsicID() == Intrinsic::masked_load || + F->getIntrinsicID() == Intrinsic::masked_store)) { + unsigned OpOffset = 0; + if (F->getIntrinsicID() == Intrinsic::masked_store) { + if (!ClInstrumentWrites) + return None; + // Masked store has an initial operand for the value. + OpOffset = 1; + Access.IsWrite = true; + } else { + if (!ClInstrumentReads) + return None; + Access.IsWrite = false; + } + + auto *BasePtr = CI->getOperand(0 + OpOffset); + auto *Ty = cast(BasePtr->getType())->getElementType(); + Access.TypeSize = DL.getTypeStoreSizeInBits(Ty); + if (auto *AlignmentConstant = + dyn_cast(CI->getOperand(1 + OpOffset))) + Access.Alignment = (unsigned)AlignmentConstant->getZExtValue(); + else + Access.Alignment = 1; // No alignment guarantees. We probably got Undef + Access.MaybeMask = CI->getOperand(2 + OpOffset); + Access.Addr = BasePtr; + } + } + + if (!Access.Addr) + return None; + + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + Type *PtrTy = cast(Access.Addr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return None; + + // Ignore swifterror addresses. + // swifterror memory addresses are mem2reg promoted by instruction + // selection. As such they cannot have regular uses like an instrumentation + // function and it makes no sense to track them as memory. + if (Access.Addr->isSwiftError()) + return None; + + return Access; +} + +void HeapProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, + Value *Mask, Instruction *I, + Value *Addr, unsigned Alignment, + uint32_t TypeSize, + bool IsWrite) { + auto *VTy = + cast(cast(Addr->getType())->getElementType()); + uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType()); + unsigned Num = VTy->getNumElements(); + auto *Zero = ConstantInt::get(IntptrTy, 0); + for (unsigned Idx = 0; Idx < Num; ++Idx) { + Value *InstrumentedAddress = nullptr; + Instruction *InsertBefore = I; + if (auto *Vector = dyn_cast(Mask)) { + // dyn_cast as we might get UndefValue + if (auto *Masked = dyn_cast(Vector->getOperand(Idx))) { + if (Masked->isZero()) + // Mask is constant false, so no instrumentation needed. + continue; + // If we have a true or undef value, fall through to instrumentAddress. + // with InsertBefore == I + } + } else { + IRBuilder<> IRB(I); + Value *MaskElem = IRB.CreateExtractElement(Mask, Idx); + Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false); + InsertBefore = ThenTerm; + } + + IRBuilder<> IRB(InsertBefore); + InstrumentedAddress = + IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)}); + instrumentAddress(I, InsertBefore, InstrumentedAddress, ElemTypeSize, + IsWrite); + } +} + +void HeapProfiler::instrumentMop(Instruction *I, const DataLayout &DL, + InterestingMemoryAccess &Access) { + if (Access.IsWrite) + NumInstrumentedWrites++; + else + NumInstrumentedReads++; + + if (Access.MaybeMask) { + instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr, + Access.Alignment, Access.TypeSize, + Access.IsWrite); + } else { + // Since the access counts will be accumulated across the entire allocation, + // we only update the shadow access count for the first location and thus + // don't need to worry about alignment and type size. + instrumentAddress(I, I, Access.Addr, Access.TypeSize, Access.IsWrite); + } +} + +void HeapProfiler::instrumentAddress(Instruction *OrigIns, + Instruction *InsertBefore, Value *Addr, + uint32_t TypeSize, bool IsWrite) { + IRBuilder<> IRB(InsertBefore); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + + if (ClUseCalls) { + IRB.CreateCall(HeapProfMemoryAccessCallback[IsWrite], AddrLong); + return; + } + + // Create an inline sequence to compute shadow location, and increment the + // value by one. + Type *ShadowTy = Type::getInt64Ty(*C); + Type *ShadowPtrTy = PointerType::get(ShadowTy, 0); + Value *ShadowPtr = memToShadow(AddrLong, IRB); + Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy); + Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr); + Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1); + ShadowValue = IRB.CreateAdd(ShadowValue, Inc); + IRB.CreateStore(ShadowValue, ShadowAddr); +} + +bool ModuleHeapProfiler::instrumentModule(Module &M) { + // Create a module constructor. + std::string HeapProfVersion = std::to_string(LLVM_HEAP_PROFILER_VERSION); + std::string VersionCheckName = + ClInsertVersionCheck ? (HeapProfVersionCheckNamePrefix + HeapProfVersion) + : ""; + std::tie(HeapProfCtorFunction, std::ignore) = + createSanitizerCtorAndInitFunctions(M, HeapProfModuleCtorName, + HeapProfInitName, /*InitArgTypes=*/{}, + /*InitArgs=*/{}, VersionCheckName); + + const uint64_t Priority = getCtorAndDtorPriority(TargetTriple); + appendToGlobalCtors(M, HeapProfCtorFunction, Priority); + + return true; +} + +void HeapProfiler::initializeCallbacks(Module &M) { + IRBuilder<> IRB(*C); + + for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) { + const std::string TypeStr = AccessIsWrite ? "store" : "load"; + + SmallVector Args2 = {IntptrTy, IntptrTy}; + SmallVector Args1{1, IntptrTy}; + HeapProfMemoryAccessCallbackSized[AccessIsWrite] = + M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr + "N", + FunctionType::get(IRB.getVoidTy(), Args2, false)); + + HeapProfMemoryAccessCallback[AccessIsWrite] = + M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr, + FunctionType::get(IRB.getVoidTy(), Args1, false)); + } + HeapProfMemmove = M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); + HeapProfMemcpy = M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy); + HeapProfMemset = M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy); +} + +bool HeapProfiler::maybeInsertHeapProfInitAtFunctionEntry(Function &F) { + // For each NSObject descendant having a +load method, this method is invoked + // by the ObjC runtime before any of the static constructors is called. + // Therefore we need to instrument such methods with a call to __heapprof_init + // at the beginning in order to initialize our runtime before any access to + // the shadow memory. + // We cannot just ignore these methods, because they may call other + // instrumented functions. + if (F.getName().find(" load]") != std::string::npos) { + FunctionCallee HeapProfInitFunction = + declareSanitizerInitFunction(*F.getParent(), HeapProfInitName, {}); + IRBuilder<> IRB(&F.front(), F.front().begin()); + IRB.CreateCall(HeapProfInitFunction, {}); + return true; + } + return false; +} + +void HeapProfiler::insertDynamicShadowAtFunctionEntry(Function &F) { + IRBuilder<> IRB(&F.front().front()); + Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal( + HeapProfShadowMemoryDynamicAddress, IntptrTy); + DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress); +} + +bool HeapProfiler::instrumentFunction(Function &F) { + if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) + return false; + if (ClDebugFunc == F.getName()) + return false; + if (F.getName().startswith("__heapprof_")) + return false; + + bool FunctionModified = false; + + // If needed, insert __heapprof_init. + // This function needs to be called even if the function body is not + // instrumented. + if (maybeInsertHeapProfInitAtFunctionEntry(F)) + FunctionModified = true; + + LLVM_DEBUG(dbgs() << "HEAPPROF instrumenting:\n" << F << "\n"); + + initializeCallbacks(*F.getParent()); + + insertDynamicShadowAtFunctionEntry(F); + + SmallVector ToInstrument; + + // Fill the set of memory operations to instrument. + for (auto &BB : F) { + for (auto &Inst : BB) { + if (isInterestingMemoryAccess(&Inst) || isa(Inst)) + ToInstrument.push_back(&Inst); + } + } + + int NumInstrumented = 0; + for (auto *Inst : ToInstrument) { + if (ClDebugMin < 0 || ClDebugMax < 0 || + (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { + Optional Access = + isInterestingMemoryAccess(Inst); + if (Access) + instrumentMop(Inst, F.getParent()->getDataLayout(), *Access); + else + instrumentMemIntrinsic(cast(Inst)); + } + NumInstrumented++; + } + + if (NumInstrumented > 0) + FunctionModified = true; + + LLVM_DEBUG(dbgs() << "HEAPPROF done instrumenting: " << FunctionModified + << " " << F << "\n"); + + return FunctionModified; +} diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -105,6 +105,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeAddressSanitizerLegacyPassPass(Registry); initializeModuleAddressSanitizerLegacyPassPass(Registry); + initializeHeapProfilerLegacyPassPass(Registry); + initializeModuleHeapProfilerLegacyPassPass(Registry); initializeBoundsCheckingLegacyPassPass(Registry); initializeControlHeightReductionLegacyPassPass(Registry); initializeGCOVProfilerLegacyPassPass(Registry); diff --git a/llvm/test/Instrumentation/HeapProfiler/basic.ll b/llvm/test/Instrumentation/HeapProfiler/basic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/HeapProfiler/basic.ll @@ -0,0 +1,179 @@ +; Test basic address sanitizer instrumentation. +; +; RUN: opt < %s -heapprof -heapprof-module -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -heapprof -heapprof-module -heapprof-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s + +; We need the requires since both heapprof and heapprof-module require reading module level metadata which is done once by the heapprof-globals-md analysis +; RUN: opt < %s -passes='function(heapprof),module(heapprof-module)' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s +; RUN: opt < %s -passes='function(heapprof),module(heapprof-module)' -heapprof-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" +; CHECK: @llvm.global_ctors = {{.*}}@heapprof.module_ctor + +define i32 @test_load(i32* %a) { +entry: + %tmp1 = load i32, i32* %a, align 4 + ret i32 %tmp1 +} +; CHECK-LABEL: @test_load +; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, i64* @__heapprof_shadow_memory_dynamic_address +; CHECK-NEXT: %[[LOAD_ADDR:[^ ]*]] = ptrtoint i32* %a to i64 +; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[LOAD_ADDR]], -64 +; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3 +; CHECK-S5-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 5 +; CHECK-NEXT: add i64 %[[SHIFTED_ADDR]], %[[SHADOW_OFFSET]] +; CHECK-NEXT: %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr +; CHECK-NEXT: %[[LOAD_SHADOW:[^ ]*]] = load i64, i64* %[[LOAD_SHADOW_PTR]] +; CHECK-NEXT: %[[NEW_SHADOW:[^ ]*]] = add i64 %[[LOAD_SHADOW]], 1 +; CHECK-NEXT: store i64 %[[NEW_SHADOW]], i64* %[[LOAD_SHADOW_PTR]] +; The actual load. +; CHECK-NEXT: %tmp1 = load i32, i32* %a +; CHECK-NEXT: ret i32 %tmp1 + +define void @test_store(i32* %a) { +entry: + store i32 42, i32* %a, align 4 + ret void +} +; CHECK-LABEL: @test_store +; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, i64* @__heapprof_shadow_memory_dynamic_address +; CHECK-NEXT: %[[STORE_ADDR:[^ ]*]] = ptrtoint i32* %a to i64 +; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[STORE_ADDR]], -64 +; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3 +; CHECK-S5-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 5 +; CHECK-NEXT: add i64 %[[SHIFTED_ADDR]], %[[SHADOW_OFFSET]] +; CHECK-NEXT: %[[STORE_SHADOW_PTR:[^ ]*]] = inttoptr +; CHECK-NEXT: %[[STORE_SHADOW:[^ ]*]] = load i64, i64* %[[STORE_SHADOW_PTR]] +; CHECK-NEXT: %[[NEW_SHADOW:[^ ]*]] = add i64 %[[STORE_SHADOW]], 1 +; CHECK-NEXT: store i64 %[[NEW_SHADOW]], i64* %[[STORE_SHADOW_PTR]] +; The actual store. +; CHECK-NEXT: store i32 42, i32* %a +; CHECK-NEXT: ret void + +define void @FP80Test(x86_fp80* nocapture %a) nounwind uwtable { +entry: + store x86_fp80 0xK3FFF8000000000000000, x86_fp80* %a, align 16 + ret void +} +; CHECK-LABEL: @FP80Test +; Exactly one shadow update for store access. +; CHECK-NOT: store i64 +; CHECK: %[[NEW_ST_SHADOW:[^ ]*]] = add i64 %{{.*}}, 1 +; CHECK-NEXT: store i64 %[[NEW_ST_SHADOW]] +; CHECK-NOT: store i64 +; The actual store. +; CHECK: store x86_fp80 0xK3FFF8000000000000000, x86_fp80* %a +; CHECK: ret void + +define void @i40test(i40* %a, i40* %b) nounwind uwtable { +entry: + %t = load i40, i40* %a + store i40 %t, i40* %b, align 8 + ret void +} +; CHECK-LABEL: @i40test +; Exactly one shadow update for load access. +; CHECK-NOT: store i64 +; CHECK: %[[NEW_LD_SHADOW:[^ ]*]] = add i64 %{{.*}}, 1 +; CHECK-NEXT: store i64 %[[NEW_LD_SHADOW]] +; CHECK-NOT: store i64 +; The actual load. +; CHECK: %t = load i40, i40* %a +; Exactly one shadow update for store access. +; CHECK-NOT: store i64 +; CHECK: %[[NEW_ST_SHADOW:[^ ]*]] = add i64 %{{.*}}, 1 +; CHECK-NEXT: store i64 %[[NEW_ST_SHADOW]] +; CHECK-NOT: store i64 +; The actual store. +; CHECK: store i40 %t, i40* %b +; CHECK: ret void + +define void @i64test_align1(i64* %b) nounwind uwtable { + entry: + store i64 0, i64* %b, align 1 + ret void +} +; CHECK-LABEL: @i64test +; Exactly one shadow update for store access. +; CHECK-NOT: store i64 +; CHECK: %[[NEW_ST_SHADOW:[^ ]*]] = add i64 %{{.*}}, 1 +; CHECK-NEXT: store i64 %[[NEW_ST_SHADOW]] +; CHECK-NOT: store i64 +; The actual store. +; CHECK: store i64 0, i64* %b +; CHECK: ret void + +define void @i80test(i80* %a, i80* %b) nounwind uwtable { + entry: + %t = load i80, i80* %a + store i80 %t, i80* %b, align 8 + ret void +} +; CHECK-LABEL: i80test +; Exactly one shadow update for load access. +; CHECK-NOT: store i64 +; CHECK: %[[NEW_LD_SHADOW:[^ ]*]] = add i64 %{{.*}}, 1 +; CHECK-NEXT: store i64 %[[NEW_LD_SHADOW]] +; CHECK-NOT: store i64 +; The actual load. +; CHECK: %t = load i80, i80* %a +; Exactly one shadow update for store access. +; CHECK-NOT: store i64 +; CHECK: %[[NEW_ST_SHADOW:[^ ]*]] = add i64 %{{.*}}, 1 +; CHECK-NEXT: store i64 %[[NEW_ST_SHADOW]] +; CHECK-NOT: store i64 +; The actual store. +; CHECK: store i80 %t, i80* %b +; CHECK: ret void + +; heapprof should not instrument functions with available_externally linkage. +define available_externally i32 @f_available_externally(i32* %a) { +entry: + %tmp1 = load i32, i32* %a + ret i32 %tmp1 +} +; CHECK-LABEL: @f_available_externally +; CHECK-NOT: __heapprof_shadow_memory_dynamic_address +; CHECK: ret i32 + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) nounwind + +define void @memintr_test(i8* %a, i8* %b) nounwind uwtable { + entry: + tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 100, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %a, i8* %b, i64 100, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 100, i1 false) + ret void +} + +; CHECK-LABEL: memintr_test +; CHECK: __heapprof_memset +; CHECK: __heapprof_memmove +; CHECK: __heapprof_memcpy +; CHECK: ret void + +declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture writeonly, i8, i64, i32) nounwind +declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32) nounwind +declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32) nounwind + +define void @memintr_element_atomic_test(i8* %a, i8* %b) nounwind uwtable { + ; This is a canary test to make sure that these don't get lowered into calls that don't + ; have the element-atomic property. Eventually, heapprof will have to be enhanced to lower + ; these properly. + ; CHECK-LABEL: memintr_element_atomic_test + ; CHECK: tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %a, i8 0, i64 100, i32 1) + ; CHECK: tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 100, i32 1) + ; CHECK: tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 100, i32 1) + ; CHECK: ret void + tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 1 %a, i8 0, i64 100, i32 1) + tail call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 100, i32 1) + tail call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* align 1 %a, i8* align 1 %b, i64 100, i32 1) + ret void +} + + +; CHECK: define internal void @heapprof.module_ctor() +; CHECK: call void @__heapprof_init() diff --git a/llvm/test/Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll b/llvm/test/Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/HeapProfiler/instrumentation-use-callbacks.ll @@ -0,0 +1,36 @@ +; Test heapprof internal compiler flags: +; -heapprof-use-callbacks +; -heapprof-memory-access-callback-prefix + +; RUN: opt < %s -heapprof -heapprof-module -heapprof-use-callbacks -S | FileCheck %s --check-prefix=CHECK-CALL --check-prefix=CHECK-CALL-DEFAULT +; RUN: opt < %s -heapprof -heapprof-module -heapprof-use-callbacks -heapprof-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CALL --check-prefix=CHECK-CALL-CUSTOM +; RUN: opt < %s -heapprof -heapprof-module -heapprof-use-callbacks=false -S | FileCheck %s --check-prefix=CHECK-INLINE +; RUN: opt < %s -heapprof -heapprof-module -S | FileCheck %s --check-prefix=CHECK-INLINE +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +define void @test_load(i32* %a, i64* %b, i512* %c, i80* %d) { +entry: +; CHECK-CALL: %[[LOAD_ADDR1:[^ ]*]] = ptrtoint i32* %a to i64 +; CHECK-CALL-DEFAULT: call void @__heapprof_load(i64 %[[LOAD_ADDR1]]) +; CHECK-CALL-CUSTOM: call void @__foo_load(i64 %[[LOAD_ADDR1]]) +; CHECK-CALL: %[[LOAD_ADDR2:[^ ]*]] = ptrtoint i64* %b to i64 +; CHECK-CALL-DEFAULT: call void @__heapprof_load(i64 %[[LOAD_ADDR2]]) +; CHECK-CALL-CUSTOM: call void @__foo_load(i64 %[[LOAD_ADDR2]]) +; CHECK-CALL: %[[LOAD_ADDR3:[^ ]*]] = ptrtoint i512* %c to i64 +; CHECK-CALL-DEFAULT: call void @__heapprof_load(i64 %[[LOAD_ADDR3]]) +; CHECK-CALL-CUSTOM: call void @__foo_load(i64 %[[LOAD_ADDR3]]) +; CHECK-CALL: %[[LOAD_ADDR4:[^ ]*]] = ptrtoint i80* %d to i64 +; CHECK-CALL-DEFAULT: call void @__heapprof_load(i64 %[[LOAD_ADDR4]]) +; CHECK-CALL-CUSTOM: call void @__foo_load(i64 %[[LOAD_ADDR4]]) +; CHECK-CALL-DEFAULT-NOT: call void @__heapprof_load +; CHECK-CALL-CUSTOM-NOT: call void @__foo_load +; CHECK-INLINE-NOT: call void @__heapprof_load + %tmp1 = load i32, i32* %a, align 4 + %tmp2 = load i64, i64* %b, align 8 + %tmp3 = load i512, i512* %c, align 32 + %tmp4 = load i80, i80* %d, align 8 + ret void +} + + diff --git a/llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll b/llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/HeapProfiler/masked-load-store.ll @@ -0,0 +1,246 @@ +; RUN: opt < %s -heapprof -heapprof-use-callbacks -S \ +; RUN: | FileCheck %s -check-prefix=LOAD -check-prefix=STORE -check-prefix=ALL +; RUN: opt < %s -heapprof -heapprof-use-callbacks -heapprof-instrument-reads=0 -S \ +; RUN: | FileCheck %s -check-prefix=NOLOAD -check-prefix=STORE -check-prefix=ALL +; RUN: opt < %s -heapprof -heapprof-use-callbacks -heapprof-instrument-writes=0 -S \ +; RUN: | FileCheck %s -check-prefix=LOAD -check-prefix=NOSTORE -check-prefix=ALL +; RUN: opt < %s -heapprof -heapprof-use-callbacks -heapprof-instrument-reads=0 -heapprof-instrument-writes=0 -S \ +; RUN: | FileCheck %s -check-prefix=NOLOAD -check-prefix=NOSTORE -check-prefix=ALL +; Support heap profiling instrumentation for constant-mask llvm.masked.{load,store} + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@v4f32 = global <4 x float>* zeroinitializer, align 8 +@v8i32 = global <8 x i32>* zeroinitializer, align 8 +@v4i64 = global <4 x i32*>* zeroinitializer, align 8 + +;;;;;;;;;;;;;;;; STORE +declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32, <4 x i1>) argmemonly nounwind +declare void @llvm.masked.store.v8i32.p0v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>) argmemonly nounwind +declare void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*>, <4 x i32*>*, i32, <4 x i1>) argmemonly nounwind + +define void @store.v4f32.1110(<4 x float> %arg) { +; ALL-LABEL: @store.v4f32.1110 + %p = load <4 x float>*, <4 x float>** @v4f32, align 8 +; NOSTORE-NOT: call void @__heapprof_store +; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 +; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP0]]) +; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1 +; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP1]]) +; STORE: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2 +; STORE: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP2]]) +; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) + ret void +} + +define void @store.v8i32.10010110(<8 x i32> %arg) { +; ALL-LABEL: @store.v8i32.10010110 + %p = load <8 x i32>*, <8 x i32>** @v8i32, align 8 +; NOSTORE-NOT: call void @__heapprof_store +; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 0 +; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP0]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP0]]) +; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 3 +; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP3]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP3]]) +; STORE: [[GEP5:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 5 +; STORE: [[PGEP5:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP5]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP5]]) +; STORE: [[GEP6:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 6 +; STORE: [[PGEP6:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP6]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP6]]) +; STORE: tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %arg, <8 x i32>* %p, i32 8, <8 x i1> ) + tail call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %arg, <8 x i32>* %p, i32 8, <8 x i1> ) + ret void +} + +define void @store.v4i64.0001(<4 x i32*> %arg) { +; ALL-LABEL: @store.v4i64.0001 + %p = load <4 x i32*>*, <4 x i32*>** @v4i64, align 8 +; NOSTORE-NOT: call void @__heapprof_store +; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x i32*>, <4 x i32*>* %p, i64 0, i64 3 +; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32** [[GEP3]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP3]]) +; STORE: tail call void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*> %arg, <4 x i32*>* %p, i32 8, <4 x i1> ) + tail call void @llvm.masked.store.v4p0i32.p0v4p0i32(<4 x i32*> %arg, <4 x i32*>* %p, i32 8, <4 x i1> ) + ret void +} + +define void @store.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { +; ALL-LABEL: @store.v4f32.variable + %p = load <4 x float>*, <4 x float>** @v4f32, align 8 +; STORE: [[MASK0:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 0 +; STORE: br i1 [[MASK0]], label %[[THEN0:[0-9A-Za-z]+]], label %[[AFTER0:[0-9A-Za-z]+]] +; STORE: [[THEN0]]: +; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 +; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP0]]) +; STORE: br label %[[AFTER0]] +; STORE: [[AFTER0]]: + +; STORE: [[MASK1:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 1 +; STORE: br i1 [[MASK1]], label %[[THEN1:[0-9A-Za-z]+]], label %[[AFTER1:[0-9A-Za-z]+]] +; STORE: [[THEN1]]: +; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1 +; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP1]]) +; STORE: br label %[[AFTER1]] +; STORE: [[AFTER1]]: + +; STORE: [[MASK2:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 2 +; STORE: br i1 [[MASK2]], label %[[THEN2:[0-9A-Za-z]+]], label %[[AFTER2:[0-9A-Za-z]+]] +; STORE: [[THEN2]]: +; STORE: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2 +; STORE: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP2]]) +; STORE: br label %[[AFTER2]] +; STORE: [[AFTER2]]: + +; STORE: [[MASK3:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 3 +; STORE: br i1 [[MASK3]], label %[[THEN3:[0-9A-Za-z]+]], label %[[AFTER3:[0-9A-Za-z]+]] +; STORE: [[THEN3]]: +; STORE: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3 +; STORE: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP3]]) +; STORE: br label %[[AFTER3]] +; STORE: [[AFTER3]]: + +; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> %mask) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> %mask) + ret void +} + +;; Store using two masked.stores, which should instrument them both. +define void @store.v4f32.1010.split(<4 x float> %arg) { +; BOTH-LABEL: @store.v4f32.1010.split + %p = load <4 x float>*, <4 x float>** @v4f32, align 8 +; STORE: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 +; STORE: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP0]]) +; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) +; STORE: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2 +; STORE: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64 +; STORE: call void @__heapprof_store(i64 [[PGEP1]]) +; STORE: tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) + tail call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %arg, <4 x float>* %p, i32 4, <4 x i1> ) + ret void +} + +;;;;;;;;;;;;;;;; LOAD +declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>) argmemonly nounwind +declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>) argmemonly nounwind +declare <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>*, i32, <4 x i1>, <4 x i32*>) argmemonly nounwind + +define <8 x i32> @load.v8i32.11100001(<8 x i32> %arg) { +; ALL-LABEL: @load.v8i32.11100001 + %p = load <8 x i32>*, <8 x i32>** @v8i32, align 8 +; NOLOAD-NOT: call void @__heapprof_load +; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 0 +; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP0]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP0]]) +; LOAD: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 1 +; LOAD: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP1]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP1]]) +; LOAD: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 2 +; LOAD: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP2]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP2]]) +; LOAD: [[GEP7:%[0-9A-Za-z]+]] = getelementptr <8 x i32>, <8 x i32>* %p, i64 0, i64 7 +; LOAD: [[PGEP7:%[0-9A-Za-z]+]] = ptrtoint i32* [[GEP7]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP7]]) +; LOAD: tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %p, i32 8, <8 x i1> , <8 x i32> %arg) + %res = tail call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %p, i32 8, <8 x i1> , <8 x i32> %arg) + ret <8 x i32> %res +} + +define <4 x float> @load.v4f32.1001(<4 x float> %arg) { +; ALL-LABEL: @load.v4f32.1001 + %p = load <4 x float>*, <4 x float>** @v4f32, align 8 +; NOLOAD-NOT: call void @__heapprof_load +; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 +; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP0]]) +; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3 +; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP3]]) +; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %arg) + %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %arg) + ret <4 x float> %res +} + +define <4 x i32*> @load.v4i64.0001(<4 x i32*> %arg) { +; ALL-LABEL: @load.v4i64.0001 + %p = load <4 x i32*>*, <4 x i32*>** @v4i64, align 8 +; NOLOAD-NOT: call void @__heapprof_load +; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x i32*>, <4 x i32*>* %p, i64 0, i64 3 +; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint i32** [[GEP3]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP3]]) +; LOAD: tail call <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>* %p, i32 8, <4 x i1> , <4 x i32*> %arg) + %res = tail call <4 x i32*> @llvm.masked.load.v4p0i32.p0v4p0i32(<4 x i32*>* %p, i32 8, <4 x i1> , <4 x i32*> %arg) + ret <4 x i32*> %res +} + +define <4 x float> @load.v4f32.variable(<4 x float> %arg, <4 x i1> %mask) { +; ALL-LABEL: @load.v4f32.variable + %p = load <4 x float>*, <4 x float>** @v4f32, align 8 +; LOAD: [[MASK0:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 0 +; LOAD: br i1 [[MASK0]], label %[[THEN0:[0-9A-Za-z]+]], label %[[AFTER0:[0-9A-Za-z]+]] +; LOAD: [[THEN0]]: +; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 +; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP0]]) +; LOAD: br label %[[AFTER0]] +; LOAD: [[AFTER0]]: + +; LOAD: [[MASK1:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 1 +; LOAD: br i1 [[MASK1]], label %[[THEN1:[0-9A-Za-z]+]], label %[[AFTER1:[0-9A-Za-z]+]] +; LOAD: [[THEN1]]: +; LOAD: [[GEP1:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 1 +; LOAD: [[PGEP1:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP1]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP1]]) +; LOAD: br label %[[AFTER1]] +; LOAD: [[AFTER1]]: + +; LOAD: [[MASK2:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 2 +; LOAD: br i1 [[MASK2]], label %[[THEN2:[0-9A-Za-z]+]], label %[[AFTER2:[0-9A-Za-z]+]] +; LOAD: [[THEN2]]: +; LOAD: [[GEP2:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 2 +; LOAD: [[PGEP2:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP2]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP2]]) +; LOAD: br label %[[AFTER2]] +; LOAD: [[AFTER2]]: + +; LOAD: [[MASK3:%[0-9A-Za-z]+]] = extractelement <4 x i1> %mask, i64 3 +; LOAD: br i1 [[MASK3]], label %[[THEN3:[0-9A-Za-z]+]], label %[[AFTER3:[0-9A-Za-z]+]] +; LOAD: [[THEN3]]: +; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3 +; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP3]]) +; LOAD: br label %[[AFTER3]] +; LOAD: [[AFTER3]]: + +; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> %mask, <4 x float> %arg) + %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> %mask, <4 x float> %arg) + ret <4 x float> %res +} + +;; Load using two masked.loads, which should instrument them both. +define <4 x float> @load.v4f32.1001.split(<4 x float> %arg) { +; BOTH-LABEL: @load.v4f32.1001 + %p = load <4 x float>*, <4 x float>** @v4f32, align 8 +; LOAD: [[GEP0:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 0 +; LOAD: [[PGEP0:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP0]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP0]]) +; LOAD: %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %arg) + %res = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %arg) +; LOAD: [[GEP3:%[0-9A-Za-z]+]] = getelementptr <4 x float>, <4 x float>* %p, i64 0, i64 3 +; LOAD: [[PGEP3:%[0-9A-Za-z]+]] = ptrtoint float* [[GEP3]] to i64 +; LOAD: call void @__heapprof_load(i64 [[PGEP3]]) +; LOAD: tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %res) + %res2 = tail call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %p, i32 4, <4 x i1> , <4 x float> %res) + ret <4 x float> %res2 +} diff --git a/llvm/test/Instrumentation/HeapProfiler/scale-granularity.ll b/llvm/test/Instrumentation/HeapProfiler/scale-granularity.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/HeapProfiler/scale-granularity.ll @@ -0,0 +1,29 @@ +; Test that the scale (-heapprof-mapping-scale) and granularity (-heapprof-mapping-granularity) command-line options work as expected +; +; RUN: opt < %s -heapprof -heapprof-module -heapprof-mapping-granularity 32 -S | FileCheck --check-prefix=CHECK-GRAN %s +; RUN: opt < %s -heapprof -heapprof-module -heapprof-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s +; RUN: opt < %s -heapprof -heapprof-module -heapprof-mapping-granularity 16 -heapprof-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s +target triple = "x86_64-unknown-linux-gnu" + +define i32 @read(i32* %a) { +entry: + %tmp1 = load i32, i32* %a, align 4 + ret i32 %tmp1 +} +; CHECK-GRAN-LABEL: @read +; CHECK-GRAN-NOT: ret +; CHECK-GRAN: and {{.*}} -32 +; CHECK-GRAN-NEXT: lshr {{.*}} 3 +; CHECK-GRAN: ret + +; CHECK-SCALE-LABEL: @read +; CHECK-SCALE-NOT: ret +; CHECK-SCALE: and {{.*}} -64 +; CHECK-SCALE-NEXT: lshr {{.*}} 1 +; CHECK-SCALE: ret + +; CHECK-BOTH-LABEL: @read +; CHECK-BOTH-NOT: ret +; CHECK-BOTH: and {{.*}} -16 +; CHECK-BOTH-NEXT: lshr {{.*}} 0 +; CHECK-BOTH: ret diff --git a/llvm/test/Instrumentation/HeapProfiler/version-mismatch-check.ll b/llvm/test/Instrumentation/HeapProfiler/version-mismatch-check.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/HeapProfiler/version-mismatch-check.ll @@ -0,0 +1,12 @@ +; Check that the HeapProf module constructor guards against compiler/runtime version +; mismatch. + +; RUN: opt < %s -heapprof-module -S | FileCheck %s +; RUN: opt < %s -heapprof-module -heapprof-guard-against-version-mismatch=0 -S | FileCheck %s --check-prefix=NOGUARD + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: define internal void @heapprof.module_ctor() +; CHECK: call void @__heapprof_version_mismatch_check_v1 +; NOGUARD-NOT: call void @__heapprof_version_mismatch_check_