Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -213,12 +213,14 @@ /// are lifetime markers. bool onlyUsedByLifetimeMarkers(const Value *V); - /// isDereferenceablePointer - Return true if this is always a dereferenceable - /// pointer. - /// - /// Test if this value is always a pointer to allocated and suitably aligned - /// memory for a simple load or store. - bool isDereferenceablePointer(const Value *V, const DataLayout &DL); + /// isDereferenceablePointer - Return true if this is always a dereferenceable + /// pointer. If the context instruction is specified perform context-sensitive + /// analysis and return true if the pointer is dereferenceable at the + /// specified instruction. + bool isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// isSafeToSpeculativelyExecute - Return true if the instruction does not /// have any effects besides calculating the result and does not have @@ -233,18 +235,36 @@ /// memory leak. It also returns false for instructions related to control /// flow, specifically terminators and PHI nodes. /// - /// This method only looks at the instruction itself and its operands, so if - /// this method returns true, it is safe to move the instruction as long as - /// the correct dominance relationships for the operands and users hold. - /// However, this method can return true for instructions that read memory; + /// If the CtxI is specified this method performs context-sensitive analysis + /// and return true if it is safe to execute the instruction immediately + /// before the CtxI. + /// + /// If the CtxI is NOT specified this method only looks at the instruction + /// itself and its operands, so if this method returns true, it is safe to + /// move the instruction as long as the correct dominance relationships for + /// the operands and users hold. + /// + /// This method can return true for instructions that read memory; /// for such instructions, moving them may change the resulting value. - bool isSafeToSpeculativelyExecute(const Value *V); + bool isSafeToSpeculativelyExecute(const Value *V, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); /// isKnownNonNull - Return true if this pointer couldn't possibly be null by /// its definition. This returns true for allocas, non-extern-weak globals /// and byval arguments. bool isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI = nullptr); + /// isKnownNonNullAt - Return true if this pointer couldn't possibly be null. + /// If the context instruction is specified perform context-sensitive analysis + /// and return true if the pointer couldn't possibly be null at the specified + /// instruction. + bool isKnownNonNullAt(const Value *V, + const Instruction *CtxI = nullptr, + const DominatorTree *DT = nullptr, + const TargetLibraryInfo *TLI = nullptr); + /// Return true if it is valid to use the assumptions provided by an /// assume intrinsic, I, at the point in the control-flow identified by the /// context instruction, CxtI. Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -226,13 +226,13 @@ /// first order w.r.t the DominatorTree. This allows us to visit definitions /// before uses, allowing us to hoist a loop body in one pass without iteration. /// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout, -/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the +/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the /// loop and loop safety information as arguments. It returns changed status. bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, LICMSafetyInfo *); -/// \brief Try to promote memory values to scalars by sinking stores out of +/// \brief Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. It takes AliasSet, Loop exit blocks vector, loop exit blocks Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -2849,33 +2849,48 @@ } static bool isDereferenceableFromAttribute(const Value *BV, APInt Offset, - Type *Ty, const DataLayout &DL) { + Type *Ty, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { assert(Offset.isNonNegative() && "offset can't be negative"); assert(Ty->isSized() && "must be sized"); APInt DerefBytes(Offset.getBitWidth(), 0); + bool CheckForNonNull = false; if (const Argument *A = dyn_cast(BV)) { DerefBytes = A->getDereferenceableBytes(); + if (!DerefBytes.getBoolValue()) { + DerefBytes = A->getDereferenceableOrNullBytes(); + CheckForNonNull = true; + } } else if (auto CS = ImmutableCallSite(BV)) { DerefBytes = CS.getDereferenceableBytes(0); + if (!DerefBytes.getBoolValue()) { + DerefBytes = CS.getDereferenceableOrNullBytes(0); + CheckForNonNull = true; + } } if (DerefBytes.getBoolValue()) if (DerefBytes.uge(Offset + DL.getTypeStoreSize(Ty))) - return true; - + if (!CheckForNonNull || isKnownNonNullAt(BV, CtxI, DT, TLI)) + return true; + return false; } -static bool isDereferenceableFromAttribute(const Value *V, - const DataLayout &DL) { +static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { Type *VTy = V->getType(); Type *Ty = VTy->getPointerElementType(); if (!Ty->isSized()) return false; APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0); - return isDereferenceableFromAttribute(V, Offset, Ty, DL); + return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI); } /// Return true if Value is always a dereferenceable pointer. @@ -2883,6 +2898,9 @@ /// Test if V is always a pointer to allocated and suitably aligned memory for /// a simple load or store. static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI, SmallPtrSetImpl &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. @@ -2903,7 +2921,8 @@ if (STy->isSized() && DTy->isSized() && (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) && (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) - return isDereferenceablePointer(BC->getOperand(0), DL, Visited); + return isDereferenceablePointer(BC->getOperand(0), DL, CtxI, + DT, TLI, Visited); } // Global variables which can't collapse to null are ok. @@ -2915,7 +2934,7 @@ if (A->hasByValAttr()) return true; - if (isDereferenceableFromAttribute(V, DL)) + if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) return true; // For GEPs, determine if the indexing lands within the allocated object. @@ -2923,7 +2942,8 @@ // Conservatively require that the base pointer be fully dereferenceable. if (!Visited.insert(GEP->getOperand(0)).second) return false; - if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited)) + if (!isDereferenceablePointer(GEP->getOperand(0), DL, CtxI, + DT, TLI, Visited)) return false; // Check the indices. gep_type_iterator GTI = gep_type_begin(GEP); @@ -2957,17 +2977,22 @@ if (const IntrinsicInst *I = dyn_cast(V)) if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) { GCRelocateOperands RelocateInst(I); - return isDereferenceablePointer(RelocateInst.derivedPtr(), DL, Visited); + return isDereferenceablePointer(RelocateInst.derivedPtr(), DL, CtxI, + DT, TLI, Visited); } if (const AddrSpaceCastInst *ASC = dyn_cast(V)) - return isDereferenceablePointer(ASC->getOperand(0), DL, Visited); + return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI, + DT, TLI, Visited); // If we don't know, assume the worst. return false; } -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL) { +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { // When dereferenceability information is provided by a dereferenceable // attribute, we know exactly how many bytes are dereferenceable. If we can // determine the exact offset to the attributed variable, we can use that @@ -2979,15 +3004,19 @@ const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); if (Offset.isNonNegative()) - if (isDereferenceableFromAttribute(BV, Offset, Ty, DL)) + if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, + CtxI, DT, TLI)) return true; } SmallPtrSet Visited; - return ::isDereferenceablePointer(V, DL, Visited); + return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited); } -bool llvm::isSafeToSpeculativelyExecute(const Value *V) { +bool llvm::isSafeToSpeculativelyExecute(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT, + const TargetLibraryInfo *TLI) { const Operator *Inst = dyn_cast(V); if (!Inst) return false; @@ -3034,7 +3063,7 @@ LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; const DataLayout &DL = LI->getModule()->getDataLayout(); - return isDereferenceablePointer(LI->getPointerOperand(), DL); + return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(Inst)) { @@ -3125,6 +3154,57 @@ return false; } +static bool isKnownNonNullFromDominatingCondition(const Value *V, + const Instruction *CtxI, + const DominatorTree *DT) { + unsigned NumUsesExplored = 0; + for (auto U : V->users()) { + // Avoid massive lists + if (NumUsesExplored >= DomConditionsMaxUses) + break; + NumUsesExplored++; + // Consider only compare instructions uniquely controlling a branch + const ICmpInst *Cmp = dyn_cast(U); + if (!Cmp) + continue; + + if (DomConditionsSingleCmpUse && !Cmp->hasOneUse()) + continue; + + for (auto *CmpU : Cmp->users()) { + const BranchInst *BI = dyn_cast(CmpU); + assert(BI->isConditional() && "uses a comparison!"); + + BasicBlock *NonNullSuccessor = nullptr; + CmpInst::Predicate Pred; + + if (match(const_cast(Cmp), + m_c_ICmp(Pred, m_Specific(V), m_Zero()))) { + if (Pred == ICmpInst::ICMP_EQ) + NonNullSuccessor = BI->getSuccessor(1); + else if (Pred == ICmpInst::ICMP_NE) + NonNullSuccessor = BI->getSuccessor(0); + } + + if (NonNullSuccessor) { + BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor); + if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent())) + return true; + } + } + } + + return false; +} + +bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI, + const DominatorTree *DT, const TargetLibraryInfo *TLI) { + if (isKnownNonNull(V, TLI)) + return true; + + return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false; +} + OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS, const DataLayout &DL, AssumptionCache *AC, Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -79,6 +79,7 @@ static bool isGuaranteedToExecute(Instruction &Inst, DominatorTree *DT, Loop *CurLoop, LICMSafetyInfo *SafetyInfo); static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT, + TargetLibraryInfo *TLI, Loop *CurLoop, LICMSafetyInfo *SafetyInfo); static bool pointerInvalidatedByLoop(Value *V, uint64_t Size, @@ -88,8 +89,8 @@ BasicBlock &ExitBlock, PHINode &PN, LoopInfo *LI); static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, - DominatorTree *DT, Loop *CurLoop, - AliasSetTracker *CurAST, + DominatorTree *DT, TargetLibraryInfo *TLI, + Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo); namespace { @@ -333,7 +334,7 @@ // operands of the instruction are loop invariant. // if (isNotUsedInLoop(I, CurLoop) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) { + canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) { ++II; Changed |= sink(I, LI, DT, CurLoop, CurAST); } @@ -382,8 +383,8 @@ // is safe to hoist the instruction. // if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) && - isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo)) + canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) && + isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo)) Changed |= hoist(I, CurLoop->getLoopPreheader()); } @@ -421,8 +422,8 @@ /// instruction. /// bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT, - Loop *CurLoop, AliasSetTracker *CurAST, - LICMSafetyInfo *SafetyInfo) { + TargetLibraryInfo *TLI, Loop *CurLoop, + AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) @@ -482,7 +483,7 @@ !isa(I)) return false; - return isSafeToExecuteUnconditionally(I, DT, CurLoop, SafetyInfo); + return isSafeToExecuteUnconditionally(I, DT, TLI, CurLoop, SafetyInfo); } /// Returns true if a PHINode is a trivially replaceable with an @@ -638,10 +639,12 @@ /// or if it is a trapping instruction and is guaranteed to execute. /// static bool isSafeToExecuteUnconditionally(Instruction &Inst, DominatorTree *DT, + TargetLibraryInfo *TLI, Loop *CurLoop, LICMSafetyInfo *SafetyInfo) { // If it is not a trapping instruction, it is always safe to hoist. - if (isSafeToSpeculativelyExecute(&Inst)) + const Instruction *CtxI = CurLoop->getLoopPreheader()->getTerminator(); + if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) return true; return isGuaranteedToExecute(Inst, DT, CurLoop, SafetyInfo); Index: test/Transforms/LICM/hoist-deref-load.ll =================================================================== --- test/Transforms/LICM/hoist-deref-load.ll +++ test/Transforms/LICM/hoist-deref-load.ll @@ -164,5 +164,91 @@ ret void } +; This test represents the following function: +; void test1(int * __restrict__ a, int * __restrict__ b, int &c, int n) { +; if (c != null) +; for (int i = 0; i < n; ++i) +; if (a[i] > 0) +; a[i] = c*b[i]; +; } +; and we want to hoist the load of %c out of the loop. This can be done only +; because the dereferenceable_or_null attribute is on %c and there is a null +; check on %c. + +; CHECK-LABEL: @test5 +; CHECK: load i32, i32* %c, align 4 +; CHECK: for.body: + +define void @test5(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable_or_null(4) %c, i32 %n) #0 { +entry: + %not_null = icmp ne i32* %c, null + br i1 %not_null, label %not.null, label %for.end + +not.null: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %not.null, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %not.null ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry, %not.null + ret void +} + +; This is the same as @test5, but without the null check on %c. +; Without this check, we should not hoist the load of %c. + +; CHECK-LABEL: @test6 +; CHECK: if.then: +; CHECK: load i32, i32* %c, align 4 + +define void @test6(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable_or_null(4) %c, i32 %n) #0 { +entry: + %cmp11 = icmp sgt i32 %n, 0 + br i1 %cmp11, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32, i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %2 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + attributes #0 = { nounwind uwtable }