Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -313,6 +313,27 @@ BasicBlock *Header = L->getHeader(); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); + + // If IV's step is one or minus one, we can unroll the loop. + // Because we don't need to generate division for computing trip count. + if (BranchInst *BI = + dyn_cast(L->getExitingBlock()->getTerminator())) { + if (ICmpInst *ExitCondICmp = dyn_cast(BI->getCondition())) { + if (const SCEVAddRecExpr *AR = + dyn_cast(SE->getSCEVAtScope( + SE->getSCEV(ExitCondICmp->getOperand(0)), L))) { + if (AR->isAffine() && AR->getOperand(1)->getSCEVType() == scConstant) { + if (const SCEVConstant *SCConst = + cast(AR->getOperand(1))) { + if (SCConst->getValue()->isOne() || + SCConst->getValue()->isMinusOne()) + AllowExpensiveTripCount = true; + } + } + } + } + } + if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L)) return false; Index: test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll =================================================================== --- test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll +++ test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll @@ -24,4 +24,47 @@ ret i32 0 } +;; We expect this loop to be unrolled, because IV's step is minus one. +;; So we don't need to generate division for computing trip count. + +define i32 @test2(i64 %v12, i8* %array, i64* %loc, fp128 %call1.i, fp128 %call2.i) { +; CHECK-LABEL: @test2( +; CHECK-LABEL: for.body.prol +; CHECK: 0xL0000000000000000401DFFFFFFF80000 +; CHECK: 0xL0000000000000000401DFFFFFFF80000 +; CHECK: 0xL0000000000000000401DFFFFFFF80000 +entry: + %rem0 = load i64, i64* %loc, align 8, !tbaa !0 + %div1 = fdiv fast fp128 %call1.i, %call2.i + %conv7 = fptoui fp128 %div1 to i64 + %sub11 = add i64 %conv7, 23 + %div11 = udiv i64 %sub11, %conv7 + %cmp.i38 = icmp ugt i64 %div11, 1 + %div12 = select i1 %cmp.i38, i64 %div11, i64 1 + br label %for.body +for.body: + %rem1 = phi i64 [ %rem0, %entry ], [ %rem2, %for.body ] + %tmp1 = phi float [ 1.000000e+00, %entry ], [ %conv21, %for.body ] + %sum1 = phi float [ 0.000000e+00, %entry ], [ %add18, %for.body ] + %k1 = phi i64 [ %div12, %entry ], [ %dec, %for.body ] + %mul1 = mul i64 %rem1, 48271 + %rem2 = urem i64 %mul1, 2147483647 + %sub16 = add nsw i64 %rem2, -1 + %conv17. = uitofp i64 %sub16 to float + %mul2 = fmul fast float %conv17., %tmp1 + %add18 = fadd fast float %mul2, %sum1 + %conv19 = fpext float %tmp1 to fp128 + %mul3 = fmul fast fp128 %conv19, 0xL0000000000000000401DFFFFFFF80000 + %conv21 = fptrunc fp128 %mul3 to float + %dec = add i64 %k1, -1 + %cmp = icmp eq i64 %dec, 0 + br i1 %cmp, label %exit, label %for.body +exit: + %conv22 = phi float [ %conv21, %for.body ] + %add19 = phi float [ %add18, %for.body ] + %rem3 = phi i64 [ %rem2, %for.body ] + store i64 %rem3, i64* %loc, align 8, !tbaa !0 + ret i32 0 +} + !0 = !{i64 1, i64 100}