Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -1822,6 +1822,21 @@ return NumElim; } +// If S contains Base, return true. This function is called by +// findExistingExpansion. +static bool IsExistExpr(const SCEV *S, const SCEV *Base) { + if (S == Base) + return true; + + if (const SCEVNAryExpr *NAry = dyn_cast(S)) { + for (auto *Op : NAry->operands()) { + if (IsExistExpr(Op, Base)) + return true; + } + } + return false; +} + Value *SCEVExpander::findExistingExpansion(const SCEV *S, const Instruction *At, Loop *L) { using namespace llvm::PatternMatch; @@ -1833,18 +1848,45 @@ for (BasicBlock *BB : ExitingBlocks) { ICmpInst::Predicate Pred; Instruction *LHS, *RHS; + ConstantInt *CRHS; BasicBlock *TrueBB, *FalseBB; - if (!match(BB->getTerminator(), - m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), - TrueBB, FalseBB))) - continue; - - if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) - return LHS; + bool IsMatchCmpII = + match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), TrueBB, + FalseBB)); + bool IsMatchCmpIC = false; + if (!IsMatchCmpII) { + IsMatchCmpIC = + match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Instruction(LHS), m_ConstantInt(CRHS)), + TrueBB, FalseBB)); + } - if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) - return RHS; + if (!IsMatchCmpII && !IsMatchCmpIC) { + continue; + } + if (IsMatchCmpII) { + if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) + return LHS; + + if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) + return RHS; + } else if (IsMatchCmpIC) { + if (IsExistExpr(SE.getSCEV(LHS), S) && SE.DT.dominates(LHS, At)) { + if (const SCEVAddRecExpr *AR = + dyn_cast(SE.getSCEVAtScope(LHS, L))) { + if (AR->getStepRecurrence(SE)->getSCEVType() == scConstant) { + if (const SCEVConstant *SCConst = + cast(AR->getOperand(1))) { + if (SCConst->getValue()->isOne() || + SCConst->getValue()->isMinusOne()) + return LHS; + } + } + } + } + } } // There is potential to make this significantly smarter, but this simple Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -313,7 +313,10 @@ BasicBlock *Header = L->getHeader(); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); - if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) + BranchInst *ExtingBlockBR = + cast(L->getExitingBlock()->getTerminator()); + if (!AllowExpensiveTripCount && + Expander.isHighCostExpansion(TripCountSC, L, ExtingBlockBR)) return false; // We only handle cases when the unroll factor is a power of 2. Index: test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll =================================================================== --- test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll +++ test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll @@ -24,4 +24,30 @@ ret i32 0 } +;; We expect this loop to be unrolled, because IV's step is minus one. +;; In this case, we don't need to generate dvision for computing trip count. + +define i32 @test2(i64* %loc, i64 %conv7) { +; CHECK-LABEL: @test2( +; CHECK-LABEL: for.body.prol +entry: + %rem0 = load i64, i64* %loc, align 8, !tbaa !0 + %div11 = udiv i64 %rem0, %conv7 + %cmp.i38 = icmp ugt i64 %div11, 1 + %div12 = select i1 %cmp.i38, i64 %div11, i64 1 + br label %for.body +for.body: + %rem1 = phi i64 [ %rem0, %entry ], [ %rem2, %for.body ] + %k1 = phi i64 [ %div12, %entry ], [ %dec, %for.body ] + %mul1 = mul i64 %rem1, 48271 + %rem2 = urem i64 %mul1, 2147483647 + %dec = add i64 %k1, -1 + %cmp = icmp eq i64 %dec, 0 + br i1 %cmp, label %exit, label %for.body +exit: + %rem3 = phi i64 [ %rem2, %for.body ] + store i64 %rem3, i64* %loc, align 8, !tbaa !0 + ret i32 0 +} + !0 = !{i64 1, i64 100}