Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -30,6 +30,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -46,6 +47,7 @@ #include using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-unroll" @@ -136,6 +138,49 @@ return ToInvariance; } +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. +// for (i = 0; i < n; i++) +// if (i < 2) +// .. +// else +// .. +// } +// +// Currently only comparisons of the form IndVar < Constant are supported. +static unsigned countToEliminateCompares(Loop *L, unsigned MaxPeelCount) { + unsigned DesiredPeelCount = 0; + PHINode *IndVar = L->getCanonicalInductionVariable(); + if (!IndVar) + return 0; + + for (auto *BB : L->blocks()) { + auto *BI = dyn_cast(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + Value *Condition = BI->getCondition(); + ConstantInt *UpperBound; + Value *Var; + CmpInst::Predicate Pred; + if (!Condition || !match(Condition, m_ICmp(Pred, m_Value(Var), + m_ConstantInt(UpperBound)))) + continue; + + if (UpperBound->isNegative()) + continue; + + if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT) && + Var == IndVar) { + unsigned Bound = UpperBound->getLimitedValue(MaxPeelCount + 1); + if (Bound <= MaxPeelCount) + DesiredPeelCount = std::max(DesiredPeelCount, Bound); + } + } + + return DesiredPeelCount; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, @@ -170,10 +215,15 @@ if (ToInvariance != InfiniteIterationsToInvariance) DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); } + + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + + DesiredPeelCount = + std::max(DesiredPeelCount, countToEliminateCompares(L, MaxPeelCount)); + if (DesiredPeelCount > 0) { - // Pay respect to limitations implied by loop size and the max peel count. - unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); Index: test/Transforms/LoopUnroll/peel-loop-conditions.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -verify-dom-info -simplifycfg | FileCheck %s + +declare void @f1() +declare void @f2() + +define void @basic(i32 %k) { +; CHECK-LABEL: @basic( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]] +; CHECK: if.else.peel: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL]] +; CHECK: for.inc.peel: +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 1 +; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL2:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.peel2: +; CHECK-NEXT: [[CMP1_PEEL3:%.*]] = icmp ult i32 [[INC_PEEL]], 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL5:%.*]], label [[IF_ELSE_PEEL4:%.*]] +; CHECK: if.else.peel4: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC_PEEL6:%.*]] +; CHECK: if.then.peel5: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL6]] +; CHECK: for.inc.peel6: +; CHECK-NEXT: [[INC_PEEL7:%.*]] = add nsw i32 [[INC_PEEL]], 1 +; CHECK-NEXT: [[CMP_PEEL8:%.*]] = icmp slt i32 [[INC_PEEL7]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL8]], label [[FOR_BODY:%.*]], label [[FOR_END]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[INC_PEEL7]], [[FOR_INC_PEEL6]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_05]], 2 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: if.else: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !0 +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +}