Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -313,7 +313,32 @@ BasicBlock *Header = L->getHeader(); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); - if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) + + // If IV's step is one or minus one, we can unroll the loop. + // Because we don't need to generate division for computing trip count. + // But if step is unknown, unrolled loop's trip count sholud be divided by + // step. This case will be checked by isHighCostExpansion. + bool InExpensiveTripCount = false; + if (BranchInst *BI = + dyn_cast(L->getExitingBlock()->getTerminator())) { + if (ICmpInst *ExitCondICmp = dyn_cast(BI->getCondition())) { + if (const SCEVAddRecExpr *AR = + dyn_cast(SE->getSCEVAtScope( + SE->getSCEV(ExitCondICmp->getOperand(0)), L))) { + if (AR->getStepRecurrence(*SE)->getSCEVType() == scConstant) { + if (const SCEVConstant *SCConst = + cast(AR->getOperand(1))) { + if (SCConst->getValue()->isOne() || + SCConst->getValue()->isMinusOne()) + InExpensiveTripCount = true; + } + } + } + } + } + + if (!InExpensiveTripCount && !AllowExpensiveTripCount && + Expander.isHighCostExpansion(TripCountSC, L)) return false; // We only handle cases when the unroll factor is a power of 2. Index: test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll =================================================================== --- test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll +++ test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll @@ -24,4 +24,30 @@ ret i32 0 } +;; We expect this loop to be unrolled, because IV's step is minus one. +;; In this case, we don't need to generate dvision for computing trip count. + +define i32 @test2(i64* %loc, i64 %conv7) { +; CHECK-LABEL: @test2( +; CHECK-LABEL: for.body.prol +entry: + %rem0 = load i64, i64* %loc, align 8, !tbaa !0 + %div11 = udiv i64 %rem0, %conv7 + %cmp.i38 = icmp ugt i64 %div11, 1 + %div12 = select i1 %cmp.i38, i64 %div11, i64 1 + br label %for.body +for.body: + %rem1 = phi i64 [ %rem0, %entry ], [ %rem2, %for.body ] + %k1 = phi i64 [ %div12, %entry ], [ %dec, %for.body ] + %mul1 = mul i64 %rem1, 48271 + %rem2 = urem i64 %mul1, 2147483647 + %dec = add i64 %k1, -1 + %cmp = icmp eq i64 %dec, 0 + br i1 %cmp, label %exit, label %for.body +exit: + %rem3 = phi i64 [ %rem2, %for.body ] + store i64 %rem3, i64* %loc, align 8, !tbaa !0 + ret i32 0 +} + !0 = !{i64 1, i64 100}