Index: include/llvm/Analysis/BasicAliasAnalysis.h =================================================================== --- include/llvm/Analysis/BasicAliasAnalysis.h +++ include/llvm/Analysis/BasicAliasAnalysis.h @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -41,20 +42,22 @@ const DataLayout &DL; AssumptionCache &AC; + InvariantInfo *InvInfo; DominatorTree *DT; LoopInfo *LI; public: BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI, - AssumptionCache &AC, DominatorTree *DT = nullptr, - LoopInfo *LI = nullptr) - : AAResultBase(TLI), DL(DL), AC(AC), DT(DT), LI(LI) {} + AssumptionCache &AC, InvariantInfo *InvInfo = nullptr, + DominatorTree *DT = nullptr, LoopInfo *LI = nullptr) + : AAResultBase(TLI), DL(DL), AC(AC), InvInfo(InvInfo), DT(DT), LI(LI) {} BasicAAResult(const BasicAAResult &Arg) - : AAResultBase(Arg), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {} + : AAResultBase(Arg), DL(Arg.DL), AC(Arg.AC), InvInfo(Arg.InvInfo), + DT(Arg.DT), LI(Arg.LI) {} BasicAAResult(BasicAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), - LI(Arg.LI) {} + : AAResultBase(std::move(Arg)), DL(Arg.DL), AC(Arg.AC), + InvInfo(Arg.InvInfo), DT(Arg.DT), LI(Arg.LI) {} /// Handle invalidation events from the new pass manager. /// Index: include/llvm/Analysis/InvariantInfo.h =================================================================== --- /dev/null +++ include/llvm/Analysis/InvariantInfo.h @@ -0,0 +1,123 @@ +//===-------- llvm/InvariantInfo.h - invariant_start/end info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines properties for handling invariant_start/end instructions +// for purposes of load elimination. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INVARIANTINFO_H +#define LLVM_ANALYSIS_INVARIANTINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Pass.h" + +namespace llvm { +class Value; +class DataLayout; +class IntrinsicInst; + +/// PreservedInvariantInfo -- A data structure to mark and access processed +/// invariant intrinsic calls. +class InvariantInfo { + + /// \brief A mapping of each 'writeonce' allocated memory (via a + /// GlobalVariable or AllocaInst instance) to its matching intrisic_start + /// call. This is to be used by GVN, InstCombine, GlobalOpt and BasicAA + /// to process invariant intrinsics for purposes of eliminating redundant + /// load instructions. + DenseMap InvariantMarkers; + +public: + + /// \brief Access the 'InvariantMarkers' mapping to extract invariant_start + /// instruction that is associated with the given Address. + IntrinsicInst *GetStartInstruction(const Value *Addr) const; + + /// \brief Add an entry to the 'InvariantMarkers' mapping. + void SetStartInstruction(Value *Addr, IntrinsicInst *IStart); +}; + +/// PreserveInvariantInfoRAII -- An RAII object to save and restore information +/// from processed invariant intrinsics. +struct PreserveInvariantInfoRAII { + + /// Information processed from invariant intrinsics, + /// for a given query instruction. + struct PreservedInfo { + IntrinsicInst *II; + Value *LoadedAddr; + InvariantInfo *InvInfo; + PreservedInfo(Value *Query, const DataLayout &DL, InvariantInfo *InvInfo); + }; + + /// \brief The preserved info. + PreservedInfo Info; + + PreserveInvariantInfoRAII(Value *Query, const DataLayout &DL, + InvariantInfo *InvInfo); + ~PreserveInvariantInfoRAII(); + +private: + void CheckPreservedInfo() const; +}; + +/// \brief Process invariant_start/end intrinsics: Mark addresses as "within +/// an invariant range" specified by the given intrinsic call. +bool processInvariantIntrinsic(IntrinsicInst *II, InvariantInfo &InvInfo); + +/// \brief Handle invariant_start/end intrinsics when scanning intructions +/// backward to either find available loads or look for pointer dependencies +/// from a given query instruction, based on preserved invariant info. +bool BackwardScanInvariantIntrinsic(const IntrinsicInst *II, + const PreserveInvariantInfoRAII &Preserved, + InvariantInfo *InvInfo); + +/// \brief A function analysis which provides an \c InvariantInfo. +/// +/// This analysis is intended for use with the new pass manager and will vend +/// invariant info for a given function. +class InvariantInfoAnalysis { + static char PassID; + +public: + typedef InvariantInfo Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + /// \brief Provide a name for the analysis for debugging and logging. + static StringRef name() { return "InvariantInfoAnalysis"; } + + InvariantInfoAnalysis() {} + InvariantInfoAnalysis(const InvariantInfoAnalysis &Arg) {} + InvariantInfoAnalysis(InvariantInfoAnalysis &&Arg) {} + InvariantInfoAnalysis &operator=(const InvariantInfoAnalysis &RHS) { return *this; } + InvariantInfoAnalysis &operator=(InvariantInfoAnalysis &&RHS) { return *this; } + + InvariantInfo run(Function &) { return InvariantInfo(); } +}; + +/// \brief The -invariant-info-marker pass. +class InvariantInfoMarkerPass : public ImmutablePass { + InvariantInfo InvInfo; + +public: + InvariantInfoMarkerPass(); + ~InvariantInfoMarkerPass() override; + + InvariantInfo &getInvariantInfo() { return InvInfo; } + const InvariantInfo &getInvariantInfo() const { return InvInfo; } + + static char ID; // Pass identification, replacement for typeid +}; + +} // End llvm namespace + +#endif Index: include/llvm/Analysis/MemoryDependenceAnalysis.h =================================================================== --- include/llvm/Analysis/MemoryDependenceAnalysis.h +++ include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -18,6 +18,7 @@ #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/IR/ValueHandle.h" @@ -329,6 +330,7 @@ AliasAnalysis *AA; DominatorTree *DT; AssumptionCache *AC; + InvariantInfo *InvInfo; const TargetLibraryInfo *TLI; PredIteratorCache PredCache; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -147,6 +147,7 @@ void initializeInstNamerPass(PassRegistry&); void initializeInternalizePassPass(PassRegistry&); void initializeIntervalPartitionPass(PassRegistry&); +void initializeInvariantInfoMarkerPassPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAPass(PassRegistry&); void initializeLICMPass(PassRegistry&); Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -488,6 +488,16 @@ if (OrLocal && isa(V)) continue; + // A pointer value associated to an invariant_start intrinsic call is + // considered to point to memory that is local to the function. + if (InvInfo && InvInfo->GetStartInstruction(V)) { + const GlobalVariable *GV = dyn_cast(V); + assert((isa(V) || (GV && !GV->isConstant())) && + "Invariant info are associated only with alloca instructions or" + "non-constant global variables"); + continue; + } + // A global constant counts as local memory for our purposes. if (const GlobalVariable *GV = dyn_cast(V)) { // Note: this doesn't require GV to be "ODR" because it isn't legal for a @@ -673,6 +683,12 @@ return Alias; } +static bool isInvariantIntrinsic(ImmutableCallSite CS) { + const IntrinsicInst *II = dyn_cast(CS.getInstruction()); + return II && (II->getIntrinsicID() == Intrinsic::invariant_start || + II->getIntrinsicID() == Intrinsic::invariant_end); +} + /// Checks to see if the specified callsite can clobber the specified memory /// object. /// @@ -734,6 +750,9 @@ if (isAssumeIntrinsic(CS)) return MRI_NoModRef; + // Invariant intrinsics follow the same pattern as assume intrinsic. + if (isInvariantIntrinsic(CS)) return MRI_NoModRef; + // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS, Loc); } @@ -746,6 +765,10 @@ if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2)) return MRI_NoModRef; + // Invariant intrinsics follow the same pattern as assume intrinsic. + if (isInvariantIntrinsic(CS1) || isInvariantIntrinsic(CS2)) + return MRI_NoModRef; + // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS1, CS2); } @@ -1566,6 +1589,7 @@ return BasicAAResult(F.getParent()->getDataLayout(), AM->getResult(F), AM->getResult(F), + AM->getCachedResult(F), AM->getCachedResult(F), AM->getCachedResult(F)); } @@ -1591,11 +1615,13 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { auto &ACT = getAnalysis(); auto &TLIWP = getAnalysis(); + auto *IIM = getAnalysisIfAvailable(); auto *DTWP = getAnalysisIfAvailable(); auto *LIWP = getAnalysisIfAvailable(); Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), ACT.getAssumptionCache(F), + IIM ? &IIM->getInvariantInfo() : nullptr, DTWP ? &DTWP->getDomTree() : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr)); Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -32,6 +32,7 @@ InstructionSimplify.cpp Interval.cpp IntervalPartition.cpp + InvariantInfo.cpp IteratedDominanceFrontier.cpp LazyCallGraph.cpp LazyValueInfo.cpp Index: lib/Analysis/InvariantInfo.cpp =================================================================== --- /dev/null +++ lib/Analysis/InvariantInfo.cpp @@ -0,0 +1,158 @@ +//===---------- InvariantInfo.cpp - invariant_start/end info --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines properties for handling invariant_start/end instructions +// for purposes of load elimination. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InvariantInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +using namespace llvm; + +IntrinsicInst *InvariantInfo::GetStartInstruction(const Value *Addr) const { + if (!Addr) return nullptr; + + // Only global variables and alloca instructions are marked. + if (!isa(Addr) && !isa(Addr)) return nullptr; + + // constant globals are also not marked. + if (const GlobalVariable *GV = dyn_cast(Addr)) { + if (GV->isConstant()) return nullptr; + } + + // Retrieve the value from the markers map. + auto EntryIter = InvariantMarkers.find(const_cast(Addr)); + if (EntryIter != InvariantMarkers.end()) return EntryIter->second; + return nullptr; +} + +void InvariantInfo::SetStartInstruction(Value *Addr, IntrinsicInst *IStart) { + if (!Addr) return; + + // Only mark either global variables or alloca instructions. + if (!isa(Addr) && !isa(Addr)) return; + + assert((!IStart || IStart->getIntrinsicID() == Intrinsic::invariant_start) && + "Given intrinsic instruction is not invariant_start"); + if (IStart && isa(Addr)) + assert(IStart->getParent() == cast(Addr)->getParent() && + "Invariant_start calls on a given alloca instruction must have" + "the same parent the alloca instruction."); + + // Erase the current entry to ensure that the new value is inserted into + // the markers map. + InvariantMarkers.erase(Addr); + + // Actually insert the value into the map. This operation must be successful. + // Note: A request to insert a nullptr value is a request to erase the entry. + // So, do not insert nullptr values. + if (IStart) { + bool Inserted = InvariantMarkers.insert(std::make_pair(Addr, IStart)).second; + assert(Inserted && "Setting new marker failed."); + } +} + +PreserveInvariantInfoRAII::PreserveInvariantInfoRAII(Value *QueryInst, + const DataLayout &DL, + InvariantInfo *InvInfo) + : Info(QueryInst, DL, InvInfo) { + if (Info.II) CheckPreservedInfo(); +} + +PreserveInvariantInfoRAII::~PreserveInvariantInfoRAII() { + if (Info.II) + Info.InvInfo->SetStartInstruction(Info.LoadedAddr, Info.II); +} + +/// Before scanning instructions backward from a given query instruction, +/// preserve information processed from invariant intrinsics, if the query is +/// is a load from writeonce memory. +PreserveInvariantInfoRAII::PreservedInfo::PreservedInfo(Value *QueryInst, + const DataLayout &DL, + InvariantInfo *InvInfo) +: II(nullptr), LoadedAddr(nullptr), InvInfo(InvInfo) { + if (!InvInfo) return; + if (LoadInst *LI = dyn_cast_or_null(QueryInst)) { + Value *Addr = GetUnderlyingObject(LI->getPointerOperand(), DL); + if (IntrinsicInst *QueryII = InvInfo->GetStartInstruction(Addr)) { + II = QueryII; + LoadedAddr = Addr; + } + } +} + +void llvm::PreserveInvariantInfoRAII::CheckPreservedInfo() const { + assert(Info.II->getIntrinsicID() == Intrinsic::invariant_start && + "Preserved instruction must be an invariant_start intrinsic"); + assert(Info.LoadedAddr && + "Can't preserve an intrinsic instruction for no load."); +} + +/// If the given intrinsic instruction is an invariant_start, then mark its +/// corresponding address as "written" 'writeonce', and thus treatable as not +/// pointing to constant memory. If the intrinsic instruction is an +/// invariant_end instead, then mark the address as no longer "written" +/// and thus writeable again (i.e. no longer pointing to constant memory). +bool llvm::processInvariantIntrinsic(IntrinsicInst *II, + InvariantInfo &InvInfo) { + assert(II && "Can't mark a null instruction."); + + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + llvm::Value *Addr = II->getArgOperand(1)->stripPointerCasts(); + InvInfo.SetStartInstruction(Addr, II); + return true; + } else if (II->getIntrinsicID() == Intrinsic::invariant_end) { + llvm::Value *Addr = II->getArgOperand(2)->stripPointerCasts(); + if (InvInfo.GetStartInstruction(Addr)) + InvInfo.SetStartInstruction(Addr, nullptr); + return true; + } + return false; +} + +/// If the query is a load that is within an invariant_start/end pair, and +/// if we encounter its associated invariant_start instruction, then start +/// treating the memory location that it loads from as not pointing to +/// constant memory. +/// +/// This function returns true if the given instrinsic instruction is an +/// invariant_start/end. +bool llvm::BackwardScanInvariantIntrinsic(const IntrinsicInst *II, + const PreserveInvariantInfoRAII &Preserved, + InvariantInfo *InvInfo) { + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + if (II == Preserved.Info.II) + InvInfo->SetStartInstruction(Preserved.Info.LoadedAddr, nullptr); + return true; + } else if (II->getIntrinsicID() == Intrinsic::invariant_end) { + llvm::Value *IStart = II->getArgOperand(0)->stripPointerCasts(); + assert(Preserved.Info.II != dyn_cast(IStart) && + "This invariant_end's start instruction could not match the " + "preserved invariant_start, if preserved."); + return true; + } + return false; +} + +char InvariantInfoAnalysis::PassID; + +InvariantInfoMarkerPass::InvariantInfoMarkerPass() : ImmutablePass(ID) { + initializeInvariantInfoMarkerPassPass(*PassRegistry::getPassRegistry()); +} + +InvariantInfoMarkerPass::~InvariantInfoMarkerPass() {} + +INITIALIZE_PASS(InvariantInfoMarkerPass, "invariant-info-marker", + "Invariant_start/end markers", false, true) +char InvariantInfoMarkerPass::ID = 0; Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -101,6 +101,8 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &F) { AA = &getAnalysis().getAAResults(); AC = &getAnalysis().getAssumptionCache(F); + auto *IIM = getAnalysisIfAvailable(); + InvInfo = IIM ? &IIM->getInvariantInfo() : nullptr; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); DT = DTWP ? &DTWP->getDomTree() : nullptr; @@ -494,6 +496,10 @@ const DataLayout &DL = BB->getModule()->getDataLayout(); + // We're about to scan backwards. Preserve the initial invariant_start + // intrinsic marking on this load, for subsequent instructions. + PreserveInvariantInfoRAII Preserved(isLoad? QueryInst : nullptr, DL, InvInfo); + // Create a numbered basic block to lazily compute and cache instruction // positions inside a BB. This is used to provide fast queries for relative // position between two instructions in a BB and can be used by @@ -504,10 +510,15 @@ while (ScanIt != BB->begin()) { Instruction *Inst = &*--ScanIt; - if (IntrinsicInst *II = dyn_cast(Inst)) + if (IntrinsicInst *II = dyn_cast(Inst)) { // Debug intrinsics don't (and can't) cause dependencies. if (isa(II)) continue; + // Same for invariant intrinsics, which may also unset preserved + // invariant info. + if (BackwardScanInvariantIntrinsic(II, Preserved, InvInfo)) continue; + } + // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. --Limit; Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -599,6 +599,7 @@ DominatorTree *DT; const TargetLibraryInfo *TLI; AssumptionCache *AC; + InvariantInfo *InvInfo; SetVector DeadBlocks; ValueTable VN; @@ -694,6 +695,7 @@ // This transformation requires dominator postdominator info void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); if (!NoLoads) @@ -747,6 +749,7 @@ INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(InvariantInfoMarkerPass) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) @@ -2297,6 +2300,12 @@ if (isa(I)) return false; + // Also ignore invariant intrinsics, after processing them. + if (IntrinsicInst *II = dyn_cast(I)) { + if (processInvariantIntrinsic(II, *InvInfo)) + return false; + } + // If the instruction can be easily simplified then do so now in preference // to value numbering it. Value numbering often exposes redundancies, for // example if it determines that %y is equal to %x then the instruction @@ -2430,6 +2439,7 @@ MD = &getAnalysis(); DT = &getAnalysis().getDomTree(); AC = &getAnalysis().getAssumptionCache(F); + InvInfo = &getAnalysis().getInvariantInfo(); TLI = &getAnalysis().getTLI(); VN.setAliasAnalysis(&getAnalysis().getAAResults()); VN.setMemDep(MD); Index: test/Transforms/LoadElim/invariant.ll =================================================================== --- /dev/null +++ test/Transforms/LoadElim/invariant.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -gvn -S | FileCheck %s + +; On a given pointer to allocated memory, invariant_start/end intrinsic +; calls indicate that the memory can be considered constant for load +; elimination purposes. +define void @_Z3ex1v() { +entry: + %i = alloca i32 + call void @_Z3fooPK1A(i32* %i) ; #1 + %0 = bitcast i32* %i to i8* + %1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + call void @_Z3fooPK1A(i32* %i) ; #2 + %2 = load i32, i32* %i ; Not clobbered by #2; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @_Z3bar1A(i32 %2) + call void @_Z3fooPK1A(i32* %i) ; #3 + %3 = load i32, i32* %i ; Not clobbered by #3; Merged into %2. + ; CHECK-NOT: load i32, i32* + call void @_Z3bar1A(i32 %3) + call void @llvm.invariant.end({}* %1, i64 4, i8* %0) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + call void @_Z3fooPK1A(i32* %i) ; #4 + %4 = load i32, i32* %i ; Clobbered by #4; Unchanged. + ; CHECK: load i32, i32* + call void @_Z3bar1A(i32 %4) + ret void +} + +declare void @_Z3bar1A(i32) +declare void @_Z3fooPK1A(i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) +