Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -903,6 +903,36 @@ // Value: the kind of extension used to widen this Instruction. DenseMap, ExtendKind> ExtendKindMap; + typedef std::pair, AssertingVH> DefUserPair; + // A map with control-dependent ranges for post increment IV uses. The key is + // a pair of IV def and a use of this def denoting the context. The value is + // a ConstantRange representing possible values of the def at the given + // context. + DenseMap PostIncRangeInfos; + + Optional getPostIncRangeInfo(Value *Def, + Instruction *UseI) { + DefUserPair Key(Def, UseI); + auto It = PostIncRangeInfos.find(Key); + return It == PostIncRangeInfos.end() + ? Optional(None) + : Optional(It->second); + } + + // Calculates PostIncRangeInfos map for the given IV + void calculatePostIncRanges(PHINode *OrigPhi); + void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser); + void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) { + DefUserPair Key(Def, UseI); + ConstantRange Current(R.getBitWidth(), /*isFullSet=*/true); + auto It = PostIncRangeInfos.find(Key); + if (It == PostIncRangeInfos.end()) { + PostIncRangeInfos.insert(std::make_pair(Key, R)); + } else { + It->second = R.intersectWith(It->second); + } + } + public: WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree, @@ -1429,7 +1459,7 @@ /// void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); - bool NeverNegative = + bool NonNegativeDef = SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, SE->getConstant(NarrowSCEV->getType(), 0)); for (User *U : NarrowDef->users()) { @@ -1439,7 +1469,15 @@ if (!Widened.insert(NarrowUser).second) continue; - NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, NeverNegative); + bool NonNegativeUse = false; + if (!NonNegativeDef) { + // We might have a control-dependent range information for this context + if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser)) + NonNegativeUse = RangeInfo->getSignedMin().isNonNegative(); + } + + NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, + NonNegativeDef || NonNegativeUse); } } @@ -1479,6 +1517,18 @@ SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) && "Loop header phi recurrence inputs do not dominate the loop"); + // Iterate over IV uses (including transitive ones) looking for IV increments + // in the form of 'add nsw %iv, '. For each increment and each use of + // the increment calculate control-dependent range information basing on + // dominating conditions inside of the loop (e.g. a range check inside of the + // loop). Calculated ranges are stored in PostIncRangeInfos map. + // + // Control-dependent range information is later used to prove that a narrow + // definition is not negative (see pushNarrowIVUsers). It's difficult to do + // this on demand because when pushNarrowIVUsers needs this information some + // of the dominating conditions might be already widened. + calculatePostIncRanges(OrigPhi); + // The rewriter provides a value for the desired IV expression. This may // either find an existing phi or materialize a new one. Either way, we // expect a well-formed cyclic phi-with-increments. i.e. any operand not part @@ -1523,6 +1573,79 @@ return WidePhi; } +void WidenIV::calculatePostIncRange(Instruction *NarrowDef, + Instruction *NarrowUser) { + using namespace llvm::PatternMatch; + + Value *NarrowDefLHS; + const APInt *NarrowDefRHS; + if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS), + m_APInt(NarrowDefRHS))) || + !NarrowDefRHS->isNonNegative()) + return; + + auto UpdateRangeFromCondition = [&] (Value *Condition) { + CmpInst::Predicate P; + Value *CmpRHS; + if (!match(Condition, m_ICmp(P, m_Specific(NarrowDefLHS), + m_Value(CmpRHS)))) + return; + + ConstantRange CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS)); + ConstantRange CmpConstrainedLHSRange = + ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange); + ConstantRange NarrowDefRange = + CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS); + + updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange); + }; + + BasicBlock *NarrowUserBB = NarrowUser->getParent(); + for (auto *DTB = (*DT)[NarrowUserBB]->getIDom(); + L->contains(DTB->getBlock()); + DTB = DTB->getIDom()) { + auto TI = DTB->getBlock()->getTerminator(); + + auto *BI = dyn_cast(TI); + if (!BI || !BI->isConditional()) + continue; + + BasicBlockEdge BBE(BI->getParent(), BI->getSuccessor(0)); + if (!(BBE.isSingleEdge() && + DT->dominates(BBE, NarrowUser->getParent()))) + continue; + + UpdateRangeFromCondition(BI->getCondition()); + } +} + +void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) { + DenseSet Visited; + SmallVector Worklist; + Worklist.push_back(OrigPhi); + Visited.insert(OrigPhi); + + while (!Worklist.empty()) { + Instruction *NarrowDef = Worklist.pop_back_val(); + + for (Use &U : NarrowDef->uses()) { + auto *NarrowUser = cast(U.getUser()); + + // Don't go looking outside the current loop. + auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()]; + if (!NarrowUserLoop || !L->contains(NarrowUserLoop)) + continue; + + if (!Visited.insert(NarrowUser).second) + continue; + + Worklist.push_back(NarrowUser); + + calculatePostIncRange(NarrowDef, NarrowUser); + } + } +} + //===----------------------------------------------------------------------===// // Live IV Reduction - Minimize IVs live across the loop. //===----------------------------------------------------------------------===// Index: test/Transforms/IndVarSimplify/post-inc-range.ll =================================================================== --- /dev/null +++ test/Transforms/IndVarSimplify/post-inc-range.ll @@ -0,0 +1,103 @@ +; RUN: opt < %s -indvars -S | FileCheck %s + +target datalayout = "p:64:64:64-n32:64" + +; When the IV in this loop is widened we want to widen this use as well: +; icmp slt i32 %i.inc, %limit +; In order to do this indvars need to prove that the narrow IV def (%i.inc) +; is not-negative from the range check inside of the loop. +define void @test(i32* %base, i32 %limit, i32 %start) { +; CHECK-LABEL: @test( +; CHECK-NOT: trunc + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %within_limits = icmp ult i32 %i, 64 + br i1 %within_limits, label %continue, label %for.end + +continue: + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +define void @test_range_metadata(i32* %array_length_ptr, i32* %base, + i32 %limit, i32 %start) { +; CHECK-LABEL: @test_range_metadata( +; CHECK-NOT: trunc + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %array_length = load i32, i32* %array_length_ptr, !range !{i32 0, i32 64 } + %within_limits = icmp ult i32 %i, %array_length + br i1 %within_limits, label %continue, label %for.end + +continue: + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +} + +; Negative version of the test above, we don't know anything about +; array_length_ptr range. +define void @test_neg(i32* %array_length_ptr, i32* %base, + i32 %limit, i32 %start) { +; CHECK-LABEL: @test_neg( +; CHECK: trunc i64 + +for.body.lr.ph: + br label %for.body + +for.body: + %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ] + %array_length = load i32, i32* %array_length_ptr + %within_limits = icmp ult i32 %i, %array_length + br i1 %within_limits, label %continue, label %for.end + +continue: + %i.i64 = zext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64 + %val = load i32, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %i.inc = add nsw nuw i32 %i, 1 + %cmp = icmp slt i32 %i.inc, %limit + br i1 %cmp, label %for.body, label %for.end + +for.end: + br label %exit + +exit: + ret void +}