Index: include/llvm/Analysis/LazyValueInfo.h =================================================================== --- include/llvm/Analysis/LazyValueInfo.h +++ include/llvm/Analysis/LazyValueInfo.h @@ -61,7 +61,13 @@ /// (from an assume intrinsic). Pred is a CmpInst predicate. Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C, Instruction *CxtI); - + + /// Determine whether the specified value comparison with a constant is known + /// to be true or false at the end of the specified block. + /// Pred is a CmpInst predicate. + Tristate getPredicate(unsigned Pred, Value *V, Constant *C, + BasicBlock *BB, Instruction *CxtI = nullptr); + /// Determine whether the specified value is known to be a /// constant at the end of the specified block. Return null if not. Constant *getConstant(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr); Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -29,6 +29,7 @@ class DominatorTree; class TargetLibraryInfo; class LoopInfo; + class LazyValueInfo; /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. @@ -214,11 +215,16 @@ bool onlyUsedByLifetimeMarkers(const Value *V); /// isDereferenceablePointer - Return true if this is always a dereferenceable - /// pointer. - /// - /// Test if this value is always a pointer to allocated and suitably aligned - /// memory for a simple load or store. - bool isDereferenceablePointer(const Value *V, const DataLayout &DL); + /// pointer. If the context instruction is specified perform context-sensitive + /// analysis and return true if the pointer is dereferenceable at the + /// specified instruction. + /// + /// Either DT or LVI must be passed for context-sensitive analysis. + bool isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + LazyValueInfo *LVI = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// isSafeToSpeculativelyExecute - Return true if the instruction does not /// have any effects besides calculating the result and does not have @@ -233,18 +239,41 @@ /// memory leak. It also returns false for instructions related to control /// flow, specifically terminators and PHI nodes. /// - /// This method only looks at the instruction itself and its operands, so if - /// this method returns true, it is safe to move the instruction as long as - /// the correct dominance relationships for the operands and users hold. - /// However, this method can return true for instructions that read memory; + /// If the CtxI is specified this method performs context-sensitive analysis + /// and return true if it is safe to execute the instruction immediately + /// before the CtxI. Either DT or LVI must be passed for context-sensitive + /// analysis. + /// + /// If the CtxI is NOT specified this method only looks at the instruction + /// itself and its operands, so if this method returns true, it is safe to + /// move the instruction as long as the correct dominance relationships for + /// the operands and users hold. + /// + /// This method can return true for instructions that read memory; /// for such instructions, moving them may change the resulting value. - bool isSafeToSpeculativelyExecute(const Value *V); + bool isSafeToSpeculativelyExecute(const Value *V, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + LazyValueInfo *LVI = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// isKnownNonNull - Return true if this pointer couldn't possibly be null by /// its definition. This returns true for allocas, non-extern-weak globals /// and byval arguments. bool isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI = nullptr); - + + /// isKnownNonNullAt - Return true if this pointer couldn't possibly be null. + /// If the context instruction is specified perform context-sensitive analysis + /// and return true if the pointer couldn't possibly be null at the specified + /// instruction. + /// + /// Either DT or LVI must be passed for context-sensitive analysis. + bool isKnownNonNullAt(const Value *V, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + LazyValueInfo *LVI = nullptr, + const TargetLibraryInfo *TLI = nullptr); + /// Return true if it is valid to use the assumptions provided by an /// assume intrinsic, I, at the point in the control-flow identified by the /// context instruction, CxtI. Index: include/llvm/IR/Argument.h =================================================================== --- include/llvm/IR/Argument.h +++ include/llvm/IR/Argument.h @@ -64,6 +64,11 @@ /// containing function, return the number of bytes known to be /// dereferenceable. Otherwise, zero is returned. uint64_t getDereferenceableBytes() const; + + /// \brief If this argument has the dereferenceable_or_null attribute on + /// it in its containing function, return the number of bytes known to be + /// dereferenceable. Otherwise, zero is returned. + uint64_t getDereferenceableOrNullBytes() const; /// \brief Return true if this argument has the byval attribute on it in its /// containing function. Index: include/llvm/IR/CallSite.h =================================================================== --- include/llvm/IR/CallSite.h +++ include/llvm/IR/CallSite.h @@ -231,7 +231,13 @@ uint64_t getDereferenceableBytes(uint16_t i) const { CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i)); } - + + /// @brief Extract the number of dereferenceable_or_null bytes for a call or + /// parameter (0=unknown). + uint64_t getDereferenceableOrNullBytes(uint16_t i) const { + CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i)); + } + /// \brief Return true if the call should not be treated as a call to a /// builtin. bool isNoBuiltin() const { Index: include/llvm/IR/Function.h =================================================================== --- include/llvm/IR/Function.h +++ include/llvm/IR/Function.h @@ -237,7 +237,13 @@ uint64_t getDereferenceableBytes(unsigned i) const { return AttributeSets.getDereferenceableBytes(i); } - + + /// @brief Extract the number of dereferenceable_or_null bytes for a call or + /// parameter (0=unknown). + uint64_t getDereferenceableOrNullBytes(unsigned i) const { + return AttributeSets.getDereferenceableOrNullBytes(i); + } + /// @brief Determine if the function does not access memory. bool doesNotAccessMemory() const { return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, Index: include/llvm/IR/Instructions.h =================================================================== --- include/llvm/IR/Instructions.h +++ include/llvm/IR/Instructions.h @@ -1473,6 +1473,12 @@ return AttributeList.getDereferenceableBytes(i); } + /// \brief Extract the number of dereferenceable_or_null bytes for a call or + /// parameter (0=unknown). + uint64_t getDereferenceableOrNullBytes(unsigned i) const { + return AttributeList.getDereferenceableOrNullBytes(i); + } + /// \brief Return true if the call should not be treated as a call to a /// builtin. bool isNoBuiltin() const { @@ -3182,6 +3188,12 @@ uint64_t getDereferenceableBytes(unsigned i) const { return AttributeList.getDereferenceableBytes(i); } + + /// \brief Extract the number of dereferenceable_or_null bytes for a call or + /// parameter (0=unknown). + uint64_t getDereferenceableOrNullBytes(unsigned i) const { + return AttributeList.getDereferenceableOrNullBytes(i); + } /// \brief Return true if the call should not be treated as a call to a /// builtin. Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -26,6 +26,7 @@ class BasicBlock; class DataLayout; class DominatorTree; +class LazyValueInfo; class Loop; class LoopInfo; class Pass; @@ -218,7 +219,7 @@ /// instructions of the loop and loop safety information as arguments. /// It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, - TargetLibraryInfo *, Loop *, AliasSetTracker *, + LazyValueInfo*, TargetLibraryInfo *, Loop *, AliasSetTracker *, LICMSafetyInfo *); /// \brief Walk the specified region of the CFG (defined by all blocks @@ -229,8 +230,8 @@ /// TargetLibraryInfo, Loop, AliasSet information for all instructions of the /// loop and loop safety information as arguments. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, - TargetLibraryInfo *, Loop *, AliasSetTracker *, - LICMSafetyInfo *); + LazyValueInfo *, TargetLibraryInfo *, Loop *, + AliasSetTracker *, LICMSafetyInfo *); /// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over Index: lib/Analysis/LazyValueInfo.cpp =================================================================== --- lib/Analysis/LazyValueInfo.cpp +++ lib/Analysis/LazyValueInfo.cpp @@ -1266,6 +1266,16 @@ return getPredicateResult(Pred, C, Result, DL, TLI); } +LazyValueInfo::Tristate +LazyValueInfo::getPredicate(unsigned Pred, Value *V, Constant *C, + BasicBlock *BB, Instruction *CxtI) { + const DataLayout &DL = BB->getModule()->getDataLayout(); + LVILatticeVal Result = + getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI); + + return getPredicateResult(Pred, C, Result, DL, TLI); +} + void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc) { if (PImpl) { Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -2849,33 +2849,50 @@ } static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset, - Type *Ty, const DataLayout &DL) { + Type *Ty, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + LazyValueInfo *LVI, + const TargetLibraryInfo *TLI) { assert(Offset.isNonNegative() && "offset can't be negative"); assert(Ty->isSized() && "must be sized"); APInt DerefBytes(Offset.getBitWidth(), 0); + bool CheckForNonNull = false; if (const Argument *A = dyn_cast(BV)) { DerefBytes = A->getDereferenceableBytes(); + if (!DerefBytes.getBoolValue()) { + DerefBytes = A->getDereferenceableOrNullBytes(); + CheckForNonNull = true; + } } else if (auto CS = ImmutableCallSite(BV)) { DerefBytes = CS.getDereferenceableBytes(0); + if (!DerefBytes.getBoolValue()) { + DerefBytes = CS.getDereferenceableOrNullBytes(0); + CheckForNonNull = true; + } } if (DerefBytes.getBoolValue()) if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty))) - return true; - + if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, LVI, TLI)) + return true; + return false; } -static bool isDereferenceableFromAttribute(const Value *V, - const DataLayout &DL) { +static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + LazyValueInfo *LVI, + const TargetLibraryInfo *TLI) { Type *VTy = V->getType(); Type *Ty = VTy->getPointerElementType(); if (!Ty->isSized()) return false; APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); - return isDereferenceableFromAttribute(V, Offset, Ty, DL); + return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, LVI, TLI); } /// Return true if Value is always a dereferenceable pointer. @@ -2883,6 +2900,10 @@ /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + LazyValueInfo *LVI, + const TargetLibraryInfo *TLI, SmallPtrSetImpl &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. @@ -2903,7 +2924,8 @@ if (STy->isSized() && DTy->isSized() && (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) - return isDereferenceablePointer(BC->getOperand(0), DL, Visited); + return isDereferenceablePointer(BC->getOperand(0), DL, CtxI, + DT, LVI, TLI, Visited); } // Global variables which can't collapse to null are ok. @@ -2915,7 +2937,7 @@ if (A->hasByValAttr()) return true; - if (isDereferenceableFromAttribute(V, DL)) + if (isDereferenceableFromAttribute(V, DL, CtxI, DT, LVI, TLI)) return true; // For GEPs, determine if the indexing lands within the allocated object. @@ -2923,7 +2945,8 @@ // Conservatively require that the base pointer be fully dereferenceable. if (!Visited.insert(GEP->getOperand(0)).second) return false; - if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited)) + if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI, + DT, LVI, TLI, Visited)) return false; // Check the indices. gep_type_iterator GTI = gep_type_begin(GEP); @@ -2957,17 +2980,23 @@ if (const IntrinsicInst *I = dyn_cast(V)) if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { GCRelocateOperands RelocateInst(I); - return isDereferenceablePointer(RelocateInst.derivedPtr(), DL, Visited); + return isDereferenceablePointer(RelocateInst.derivedPtr(), DL, CtxI, + DT, LVI, TLI, Visited); } if (const AddrSpaceCastInst *ASC = dyn_cast(V)) - return isDereferenceablePointer(ASC->getOperand(0), DL, Visited); + return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI, + DT, LVI, TLI, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL) { +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + LazyValueInfo *LVI, + const TargetLibraryInfo *TLI) { // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that @@ -2979,15 +3008,20 @@ const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); if (Offset.isNonNegative()) - if (isDereferenceableFromAttribute(BV, Offset, Ty, DL)) + if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, + CtxI, DT, LVI, TLI)) return true; } SmallPtrSet Visited; - return ::isDereferenceablePointer(V, DL, Visited); + return ::isDereferenceablePointer(V, DL, CtxI, DT, LVI, TLI, Visited); } -bool llvm::isSafeToSpeculativelyExecute(const Value *V) { +bool llvm::isSafeToSpeculativelyExecute(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT, + LazyValueInfo *LVI, + const TargetLibraryInfo *TLI) { const Operator *Inst = dyn_cast(V); if (!Inst) return false; @@ -3034,7 +3068,8 @@ LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceablePointer(LI->getPointerOperand(), DL); + return isDereferenceablePointer(LI->getPointerOperand(), DL, + CtxI, DT, LVI, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { @@ -3125,6 +3160,68 @@ return false; } +static bool isKnownNonNullFromDominatingCondition(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT) { + unsigned NumUsesExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + // Consider only compare instructions uniquely controlling a branch + const ICmpInst *Cmp = dyn_cast(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + const BranchInst *BI = dyn_cast(CmpU); + assert(BI->isConditional() && "uses a comparison!"); + + BasicBlock *NonNullSuccessor = nullptr; + CmpInst::Predicate Pred; + + if (match(const_cast(Cmp), + m_c_ICmp(Pred, m_Specific(V), m_Zero()))) { + if (Pred == ICmpInst::ICMP_EQ) + NonNullSuccessor = BI->getSuccessor(1); + else if (Pred == ICmpInst::ICMP_NE) + NonNullSuccessor = BI->getSuccessor(0); + } + + if (NonNullSuccessor) { + BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); + if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) + return true; + } + } + } + + return false; +} + +bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, + const DominatorTree *DT, LazyValueInfo *LVI, + const TargetLibraryInfo *TLI) { + if (isKnownNonNull(V, TLI)) + return true; + + if (CtxI) { + if (LVI) { + return LazyValueInfo::True == LVI->getPredicate(CmpInst::ICMP_NE, + const_cast(V), + ConstantPointerNull::get(cast(V->getType())), + const_cast(CtxI->getParent())); + } + return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT); + } + + return false; +} + OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, const DataLayout &DL, AssumptionCache *AC, Index: lib/IR/AttributeImpl.h =================================================================== --- lib/IR/AttributeImpl.h +++ lib/IR/AttributeImpl.h @@ -166,6 +166,7 @@ unsigned getAlignment() const; unsigned getStackAlignment() const; uint64_t getDereferenceableBytes() const; + uint64_t getDereferenceableOrNullBytes() const; std::string getAsString(bool InAttrGrp) const; typedef const Attribute *iterator; Index: lib/IR/Attributes.cpp =================================================================== --- lib/IR/Attributes.cpp +++ lib/IR/Attributes.cpp @@ -532,6 +532,13 @@ return 0; } +uint64_t AttributeSetNode::getDereferenceableOrNullBytes() const { + for (iterator I = begin(), E = end(); I != E; ++I) + if (I->hasAttribute(Attribute::DereferenceableOrNull)) + return I->getDereferenceableOrNullBytes(); + return 0; +} + std::string AttributeSetNode::getAsString(bool InAttrGrp) const { std::string Str; for (iterator I = begin(), E = end(); I != E; ++I) { @@ -957,6 +964,11 @@ return ASN ? ASN->getDereferenceableBytes() : 0; } +uint64_t AttributeSet::getDereferenceableOrNullBytes(unsigned Index) const { + AttributeSetNode *ASN = getAttributes(Index); + return ASN ? ASN->getDereferenceableOrNullBytes() : 0; +} + std::string AttributeSet::getAsString(unsigned Index, bool InAttrGrp) const { AttributeSetNode *ASN = getAttributes(Index); Index: lib/IR/Function.cpp =================================================================== --- lib/IR/Function.cpp +++ lib/IR/Function.cpp @@ -117,6 +117,12 @@ return getParent()->getDereferenceableBytes(getArgNo()+1); } +uint64_t Argument::getDereferenceableOrNullBytes() const { + assert(getType()->isPointerTy() && + "Only pointers have dereferenceable bytes"); + return getParent()->getDereferenceableOrNullBytes(getArgNo()+1); +} + /// hasNestAttr - Return true if this argument has the nest attribute on /// it in its containing function. bool Argument::hasNestAttr() const { Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -79,6 +80,8 @@ static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT, Loop *CurLoop, LICMSafetyInfo *SafetyInfo); static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT, + LazyValueInfo *LVI, + TargetLibraryInfo *TLI, Loop *CurLoop, LICMSafetyInfo *SafetyInfo); static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, @@ -87,9 +90,10 @@ static Instruction *CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN, LoopInfo *LI); -static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, - DominatorTree *DT, Loop *CurLoop, - AliasSetTracker *CurAST, +static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, + DominatorTree *DT, LazyValueInfo *LVI, + TargetLibraryInfo *TLI, Loop *CurLoop, + AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo); namespace { @@ -116,6 +120,7 @@ AU.addPreserved(); AU.addPreserved(); AU.addRequired(); + AU.addRequired(); } using llvm::Pass::doFinalization; @@ -129,6 +134,7 @@ AliasAnalysis *AA; // Current AliasAnalysis information LoopInfo *LI; // Current LoopInfo DominatorTree *DT; // Dominator Tree for the current Loop. + LazyValueInfo *LVI; TargetLibraryInfo *TLI; // TargetLibraryInfo for constant folding. @@ -179,7 +185,7 @@ LI = &getAnalysis().getLoopInfo(); AA = &getAnalysis(); DT = &getAnalysis().getDomTree(); - + LVI = &getAnalysis(); TLI = &getAnalysis().getTLI(); assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -232,10 +238,10 @@ // instructions, we perform another pass to hoist them out of the loop. // if (L->hasDedicatedExits()) - Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop, - CurAST, &SafetyInfo); + Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, LVI, TLI, + CurLoop, CurAST, &SafetyInfo); if (Preheader) - Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, + Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, LVI, TLI, CurLoop, CurAST, &SafetyInfo); // Now that all loop invariants have been removed from the loop, promote any @@ -289,7 +295,8 @@ /// definitions, allowing us to sink a loop body in one pass without iteration. /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + DominatorTree *DT, LazyValueInfo *LVI, + TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. @@ -308,7 +315,8 @@ const std::vector &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) Changed |= - sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + sinkRegion(Children[i], AA, LI, DT, LVI, TLI, + CurLoop, CurAST, SafetyInfo); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB,CurLoop,LI)) return Changed; @@ -333,7 +341,7 @@ // operands of the instruction are loop invariant. // if (isNotUsedInLoop(I, CurLoop) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) { + canSinkOrHoistInst(I, AA, DT, LVI, TLI, CurLoop, CurAST, SafetyInfo)) { ++II; Changed |= sink(I, LI, DT, CurLoop, CurAST); } @@ -347,7 +355,8 @@ /// uses, allowing us to hoist a loop body in one pass without iteration. /// bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + DominatorTree *DT, LazyValueInfo *LVI, + TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && @@ -376,21 +385,23 @@ I.eraseFromParent(); continue; } - + // Try hoisting the instruction out to the preheader. We can only do this // if all of the operands of the instruction are loop invariant and if it // is safe to hoist the instruction. // if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) && - isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo)) + canSinkOrHoistInst(I, AA, DT, LVI, TLI, + CurLoop, CurAST, SafetyInfo) && + isSafeToExecuteUnconditionally(I, DT, LVI, TLI, CurLoop, SafetyInfo)) Changed |= hoist(I, CurLoop->getLoopPreheader()); } const std::vector &Children = N->getChildren(); for (unsigned i = 0, e = Children.size(); i != e; ++i) Changed |= - hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + hoistRegion(Children[i], AA, LI, DT, LVI, TLI, + CurLoop, CurAST, SafetyInfo); return Changed; } @@ -421,7 +432,8 @@ /// instruction. /// bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, - Loop *CurLoop, AliasSetTracker *CurAST, + LazyValueInfo *LVI, TargetLibraryInfo *TLI, + Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { @@ -442,7 +454,7 @@ AAMDNodes AAInfo; LI->getAAMetadata(AAInfo); - + return !pointerInvalidatedByLoop(LI->getOperand(0), Size, AAInfo, CurAST); } else if (CallInst *CI = dyn_cast(&I)) { // Don't sink or hoist dbg info; it's legal, but not useful. @@ -482,7 +494,7 @@ !isa(I)) return false; - return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo); + return isSafeToExecuteUnconditionally(I, DT, LVI, TLI, CurLoop, SafetyInfo); } /// Returns true if a PHINode is a trivially replaceable with an @@ -638,12 +650,15 @@ /// or if it is a trapping instruction and is guaranteed to execute. /// static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT, + LazyValueInfo *LVI, + TargetLibraryInfo *TLI, Loop *CurLoop, LICMSafetyInfo *SafetyInfo) { // If it is not a trapping instruction, it is always safe to hoist. - if (isSafeToSpeculativelyExecute(&Inst)) + const Instruction *CtxI = CurLoop->getLoopPreheader()->getTerminator(); + if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, LVI, TLI)) return true; - + return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); } Index: test/Transforms/LICM/hoist-deref-load.ll =================================================================== --- test/Transforms/LICM/hoist-deref-load.ll +++ test/Transforms/LICM/hoist-deref-load.ll @@ -164,5 +164,91 @@ ret void } +; This test represents the following function: +; void test1(int * __restrict__ a, int * __restrict__ b, int &c, int n) { +; if (c != null) +; for (int i = 0; i < n; ++i) +; if (a[i] > 0) +; a[i] = c*b[i]; +; } +; and we want to hoist the load of %c out of the loop. This can be done only +; because the dereferenceable_or_null attribute is on %c and there is a null +; check on %c. + +; CHECK-LABEL: @test5 +; CHECK: load i32, i32* %c, align 4 +; CHECK: for.body: + +define void @test5(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable_or_null(4) %c, i32 %n) #0 { +entry: + %not_null = icmp ne i32* %c, null + br i1 %not_null, label %not.null, label %for.end + +not.null: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %not.null, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %not.null ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry, %not.null + ret void +} + +; This is the same as @test5, but without the null check on %c. +; Without this check, we should not hoist the load of %c. + +; CHECK-LABEL: @test6 +; CHECK: if.then: +; CHECK: load i32, i32* %c, align 4 + +define void @test6(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable_or_null(4) %c, i32 %n) #0 { +entry: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + attributes #0 = { nounwind uwtable }