diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -50,8 +50,12 @@ FMulAdd, ///< Fused multiply-add of floats (a * b + c). SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop ///< invariant - SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop + SelectFCmp, ///< Integer select(fcmp(),x,y) where one of (x,y) is loop ///< invariant + SelectIVICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is increasing + ///< loop induction PHI + SelectIVFCmp, ///< Integer select(fcmp(),x,y) where one of (x,y) is increasing + ///< loop induction PHI }; /// The RecurrenceDescriptor is used to identify recurrences variables in a @@ -123,7 +127,7 @@ /// the returned struct. static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, - FastMathFlags FuncFMF); + FastMathFlags FuncFMF, ScalarEvolution *SE); /// Returns true if instruction I has multiple uses in Insts static bool hasMultipleUsesOf(Instruction *I, @@ -144,11 +148,13 @@ /// Returns a struct describing whether the instruction is either a /// Select(ICmp(A, B), X, Y), or /// Select(FCmp(A, B), X, Y) - /// where one of (X, Y) is a loop invariant integer and the other is a PHI - /// value. \p Prev specifies the description of an already processed select - /// instruction, so its corresponding cmp can be matched to it. + /// where one of (X, Y) is a loop invariant integer or an increasing loop + /// induction variable and the other is a PHI value. \p Prev specifies the + /// description of an already processed select instruction, so its + /// corresponding cmp can be matched to it. static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi, - Instruction *I, InstDesc &Prev); + Instruction *I, InstDesc &Prev, + ScalarEvolution *SE); /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. @@ -234,7 +240,8 @@ /// Returns true if the recurrence kind is of the form /// select(cmp(),x,y) where one of (x,y) is loop invariant. static bool isSelectCmpRecurrenceKind(RecurKind Kind) { - return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp; + return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp || + Kind == RecurKind::SelectIVICmp || Kind == RecurKind::SelectIVFCmp; } /// Returns the type of the recurrence. This type can be narrower than the diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -358,9 +358,10 @@ /// See RecurrenceDescriptor::isSelectCmpPattern for a description of the /// pattern we are trying to match. In this pattern we are only ever selecting /// between two values: 1) an initial PHI start value, and 2) a loop invariant -/// value. This function uses \p LoopExitInst to determine 2), which we then use -/// to select between \p Left and \p Right. Any lane value in \p Left that -/// matches 2) will be merged into \p Right. +/// value and increasing loop induction variable. This function uses \p +/// LoopExitInst to determine 2), which we then use to select between \p Left +/// and \p Right. Any lane value in \p Left that matches 2) will be merged into +/// \p Right. Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right); @@ -391,6 +392,15 @@ /// Create a target reduction of the given vector \p Src for a reduction of the /// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation /// is described by \p Desc. +Value *createInvariantSelectCmpTargetReduction(IRBuilderBase &B, + const TargetTransformInfo *TTI, + Value *Src, + const RecurrenceDescriptor &Desc, + PHINode *OrigPhi); + +/// Create a target reduction of the given vector \p Src for a reduction of the +/// kind conforms to RecurrenceDescriptor::isSelectCmpPattern. The reduction +/// operation is described by \p Desc. Value *createSelectCmpTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, @@ -411,6 +421,16 @@ const RecurrenceDescriptor &Desc, Value *Src, Value *Start); +/// Returns a set of cmp and select instructions as shown below: +/// Select(Cmp(NE, Rdx, Iden), Rdx, InitVal) +/// where \p Rdx is a scalar value generated by target reduction, Iden is the +/// sentinel value of the recurrence descriptor \p Desc, and InitVal is the +/// start value of the recurrence descriptor \p Desc. +Value *createSentinelValueHandling(IRBuilderBase &Builder, + const TargetTransformInfo *TTI, + const RecurrenceDescriptor &Desc, + Value *Rdx); + /// Get the intersection (logical and) of all of the potential IR flags /// of each scalar operation (VL) that will be converted into a vector (I). /// If OpValue is non-null, we only consider operations similar to OpValue diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -54,6 +54,8 @@ case RecurKind::UMin: case RecurKind::SelectICmp: case RecurKind::SelectFCmp: + case RecurKind::SelectIVICmp: + case RecurKind::SelectIVFCmp: return true; } return false; @@ -375,7 +377,7 @@ // type-promoted). if (Cur != Start) { ReduxDesc = - isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF); + isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE); ExactFPMathInst = ExactFPMathInst == nullptr ? ReduxDesc.getExactFPMathInst() : ExactFPMathInst; @@ -419,10 +421,12 @@ if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) return false; - if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) && + if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp || + Kind == RecurKind::SelectIVICmp) && (isa(Cur) || isa(Cur))) ++NumCmpSelectPatternInst; - if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) && + if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp || + Kind == RecurKind::SelectIVFCmp) && (isa(Cur) || isa(Cur))) ++NumCmpSelectPatternInst; @@ -488,7 +492,7 @@ ((!isa(UI) && !isa(UI) && !isa(UI)) || (!isConditionalRdxPattern(Kind, UI).isRecurrence() && - !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal) + !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal, SE) .isRecurrence() && !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence()))) return false; @@ -629,7 +633,8 @@ // value (3 in the example above). RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi, - Instruction *I, InstDesc &Prev) { + Instruction *I, InstDesc &Prev, + ScalarEvolution *SE) { // We must handle the select(cmp(),x,y) as a single instruction. Advance to // the select. CmpInst::Predicate Pred; @@ -653,14 +658,37 @@ else return InstDesc(false, I); + auto IsIncreasingLoopInduction = [&SE, &Loop](Value *V) { + if (!SE) + return false; + + auto *Phi = dyn_cast(V); + if (!Phi) + return false; + + auto LB = Loop::LoopBounds::getBounds(*Loop, *Phi, *SE); + if (!LB) + return false; + + auto Direction = LB->getDirection(); + return Direction == Loop::LoopBounds::Direction::Increasing; + }; + // We are looking for selects of the form: // select(cmp(), phi, loop_invariant) or // select(cmp(), loop_invariant, phi) - if (!Loop->isLoopInvariant(NonPhi)) - return InstDesc(false, I); + if (Loop->isLoopInvariant(NonPhi)) + return InstDesc(I, isa(I->getOperand(0)) ? RecurKind::SelectICmp + : RecurKind::SelectFCmp); + // or + // select(cmp(), phi, loop_induction) or + // select(cmp(), loop_induction, phi) + if (IsIncreasingLoopInduction(NonPhi)) + return InstDesc(I, isa(I->getOperand(0)) + ? RecurKind::SelectIVICmp + : RecurKind::SelectIVFCmp); - return InstDesc(I, isa(I->getOperand(0)) ? RecurKind::SelectICmp - : RecurKind::SelectFCmp); + return InstDesc(false, I); } RecurrenceDescriptor::InstDesc @@ -762,10 +790,9 @@ return InstDesc(true, SI); } -RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi, - Instruction *I, RecurKind Kind, - InstDesc &Prev, FastMathFlags FuncFMF) { +RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( + Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev, + FastMathFlags FuncFMF, ScalarEvolution *SE) { assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind); switch (I->getOpcode()) { default: @@ -800,7 +827,7 @@ case Instruction::ICmp: case Instruction::Call: if (isSelectCmpRecurrenceKind(Kind)) - return isSelectCmpPattern(L, OrigPhi, I, Prev); + return isSelectCmpPattern(L, OrigPhi, I, Prev, SE); if (isIntMinMaxRecurrenceKind(Kind) || (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) || (isa(I) && I->hasNoNaNs() && @@ -1067,6 +1094,10 @@ case RecurKind::SelectFCmp: return getRecurrenceStartValue(); break; + case RecurKind::SelectIVICmp: + case RecurKind::SelectIVFCmp: + // FIXME: SMax or UMax, I'm not sure which one is correct. + return getRecurrenceIdentity(RecurKind::SMax, Tp, FMF); default: llvm_unreachable("Unknown recurrence kind"); } @@ -1094,10 +1125,12 @@ case RecurKind::UMax: case RecurKind::UMin: case RecurKind::SelectICmp: + case RecurKind::SelectIVICmp: return Instruction::ICmp; case RecurKind::FMax: case RecurKind::FMin: case RecurKind::SelectFCmp: + case RecurKind::SelectIVFCmp: return Instruction::FCmp; default: llvm_unreachable("Unknown recurrence operation"); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -933,11 +933,22 @@ Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right) { - if (auto VTy = dyn_cast(Left->getType())) - StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal); - Value *Cmp = - Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp"); - return Builder.CreateSelect(Cmp, Left, Right, "rdx.select"); + switch (RK) { + case RecurKind::SelectICmp: + case RecurKind::SelectFCmp: { + if (auto VTy = dyn_cast(Left->getType())) + StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal); + Value *Cmp = + Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp"); + return Builder.CreateSelect(Cmp, Left, Right, "rdx.select"); + } + case RecurKind::SelectIVICmp: + case RecurKind::SelectIVFCmp: + // TODO: SMax or UMax? + return createMinMaxOp(Builder, RecurKind::SMax, Left, Right); + default: + llvm_unreachable("Unknown SelectCmp recurrence kind"); + } } Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, @@ -1021,13 +1032,11 @@ return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); } -Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder, - const TargetTransformInfo *TTI, - Value *Src, - const RecurrenceDescriptor &Desc, - PHINode *OrigPhi) { - assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind( - Desc.getRecurrenceKind()) && +Value *llvm::createInvariantSelectCmpTargetReduction( + IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src, + const RecurrenceDescriptor &Desc, PHINode *OrigPhi) { + assert((Desc.getRecurrenceKind() == RecurKind::SelectICmp || + Desc.getRecurrenceKind() == RecurKind::SelectFCmp) && "Unexpected reduction kind"); Value *InitVal = Desc.getRecurrenceStartValue(); Value *NewVal = nullptr; @@ -1061,6 +1070,30 @@ return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select"); } +Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder, + const TargetTransformInfo *TTI, + Value *Src, + const RecurrenceDescriptor &Desc, + PHINode *OrigPhi) { + assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind( + Desc.getRecurrenceKind()) && + "Unexpected reduction kind"); + RecurKind RdxKind = Desc.getRecurrenceKind(); + switch (RdxKind) { + case RecurKind::SelectICmp: + case RecurKind::SelectFCmp: + return createInvariantSelectCmpTargetReduction(Builder, TTI, Src, Desc, + OrigPhi); + case RecurKind::SelectIVICmp: + case RecurKind::SelectIVFCmp: + // FIXME: SMax or UMax? + // TODO: Decreasing induction need fix here + return Builder.CreateIntMaxReduce(Src, true); + default: + llvm_unreachable("Unknown SelectCmp recurrence kind"); + } +} + Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind) { @@ -1128,6 +1161,17 @@ return B.CreateFAddReduce(Start, Src); } +Value *llvm::createSentinelValueHandling(IRBuilderBase &Builder, + const TargetTransformInfo *TTI, + const RecurrenceDescriptor &Desc, + Value *Rdx) { + Value *InitVal = Desc.getRecurrenceStartValue(); + Value *Iden = Desc.getRecurrenceIdentity( + Desc.getRecurrenceKind(), Rdx->getType(), Desc.getFastMathFlags()); + Value *Cmp = Builder.CreateCmp(CmpInst::ICMP_NE, Rdx, Iden, "rdx.select.cmp"); + return Builder.CreateSelect(Cmp, Rdx, InitVal, "rdx.select"); +} + void llvm::propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue, bool IncludeWrapFlags) { auto *VecOp = dyn_cast(I); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3998,6 +3998,10 @@ : Builder.CreateZExt(ReducedPartRdx, PhiTy); } + if (RK == RecurKind::SelectIVICmp || RK == RecurKind::SelectIVFCmp) + ReducedPartRdx = + createSentinelValueHandling(Builder, TTI, RdxDesc, ReducedPartRdx); + PHINode *ResumePhi = dyn_cast(PhiR->getStartValue()->getUnderlyingValue()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1252,7 +1252,7 @@ Value *Iden = nullptr; RecurKind RK = RdxDesc.getRecurrenceKind(); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || - RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { + (RK == RecurKind::SelectICmp || RK == RecurKind::SelectFCmp)) { // MinMax reduction have the start value as their identify. if (ScalarPHI) { Iden = StartV; @@ -1262,6 +1262,14 @@ StartV = Iden = Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); } + } else if (RK == RecurKind::SelectIVICmp || RK == RecurKind::SelectIVFCmp) { + StartV = Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), + RdxDesc.getFastMathFlags()); + if (!ScalarPHI) { + IRBuilderBase::InsertPointGuard IPBuilder(Builder); + Builder.SetInsertPoint(VectorPH->getTerminator()); + StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden); + } } else { Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), RdxDesc.getFastMathFlags()); diff --git a/llvm/test/Transforms/LoopVectorize/select-min-index.ll b/llvm/test/Transforms/LoopVectorize/select-min-index.ll --- a/llvm/test/Transforms/LoopVectorize/select-min-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-min-index.ll @@ -1,6 +1,7 @@ -; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s -; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s -; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function test_not_vectorize_select_no_min_reduction --version 2 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s --check-prefix=CHECK-VF4IC1 --check-prefix=CHECK +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s --check-prefix=CHECK-VF4IC2 --check-prefix=CHECK +; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s --check-prefix=CHECK-VF1IC2 --check-prefix=CHECK ; Test cases for selecting the index with the minimum value. @@ -82,8 +83,174 @@ } define i64 @test_not_vectorize_select_no_min_reduction(ptr %src) { -; CHECK-LABEL: @test_not_vectorize_select_no_min_reduction( -; CHECK-NOT: vector.body: +; CHECK-VF4IC1-LABEL: define i64 @test_not_vectorize_select_no_min_reduction +; CHECK-VF4IC1-SAME: (ptr [[SRC:%.*]]) { +; CHECK-VF4IC1-NEXT: entry: +; CHECK-VF4IC1-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF4IC1: vector.ph: +; CHECK-VF4IC1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-VF4IC1: vector.body: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP0]] +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 +; CHECK-VF4IC1-NEXT: [[TMP3]] = add <4 x i64> [[WIDE_LOAD]], +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP3]], <4 x i32> +; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp ugt <4 x i64> [[TMP4]], [[WIDE_LOAD]] +; CHECK-VF4IC1-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-VF4IC1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-VF4IC1: middle.block: +; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP6]]) +; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808 +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 0 +; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 +; CHECK-VF4IC1-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 +; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-VF4IC1: scalar.ph: +; CHECK-VF4IC1-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: br label [[LOOP:%.*]] +; CHECK-VF4IC1: loop: +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF4IC1-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-VF4IC1-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SCALAR_RECUR]], [[L]] +; CHECK-VF4IC1-NEXT: [[MIN_VAL_NEXT]] = add i64 [[L]], 1 +; CHECK-VF4IC1-NEXT: [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[SCALAR_RECUR]], i64 [[L]]) +; CHECK-VF4IC1-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-VF4IC1-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0 +; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-VF4IC1: exit: +; CHECK-VF4IC1-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i64 [[RES]] +; +; CHECK-VF4IC2-LABEL: define i64 @test_not_vectorize_select_no_min_reduction +; CHECK-VF4IC2-SAME: (ptr [[SRC:%.*]]) { +; CHECK-VF4IC2-NEXT: entry: +; CHECK-VF4IC2-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF4IC2: vector.ph: +; CHECK-VF4IC2-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-VF4IC2: vector.body: +; CHECK-VF4IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC2-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC2-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC2-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-VF4IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF4IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; CHECK-VF4IC2-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP0]] +; CHECK-VF4IC2-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP1]] +; CHECK-VF4IC2-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 +; CHECK-VF4IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 4 +; CHECK-VF4IC2-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP2]], i32 4 +; CHECK-VF4IC2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP5]], align 4 +; CHECK-VF4IC2-NEXT: [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], +; CHECK-VF4IC2-NEXT: [[TMP7]] = add <4 x i64> [[WIDE_LOAD3]], +; CHECK-VF4IC2-NEXT: [[TMP8:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP6]], <4 x i32> +; CHECK-VF4IC2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP7]], <4 x i32> +; CHECK-VF4IC2-NEXT: [[TMP10:%.*]] = icmp ugt <4 x i64> [[TMP8]], [[WIDE_LOAD]] +; CHECK-VF4IC2-NEXT: [[TMP11:%.*]] = icmp ugt <4 x i64> [[TMP9]], [[WIDE_LOAD3]] +; CHECK-VF4IC2-NEXT: [[TMP12]] = select <4 x i1> [[TMP10]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-VF4IC2-NEXT: [[TMP13]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI2]] +; CHECK-VF4IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-VF4IC2-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], +; CHECK-VF4IC2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-VF4IC2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-VF4IC2: middle.block: +; CHECK-VF4IC2-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[TMP12]], <4 x i64> [[TMP13]]) +; CHECK-VF4IC2-NEXT: [[TMP15:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX]]) +; CHECK-VF4IC2-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP15]], -9223372036854775808 +; CHECK-VF4IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP15]], i64 0 +; CHECK-VF4IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 +; CHECK-VF4IC2-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3 +; CHECK-VF4IC2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-VF4IC2: scalar.ph: +; CHECK-VF4IC2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-VF4IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC2-NEXT: br label [[LOOP:%.*]] +; CHECK-VF4IC2: loop: +; CHECK-VF4IC2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF4IC2-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF4IC2-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF4IC2-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-VF4IC2-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-VF4IC2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SCALAR_RECUR]], [[L]] +; CHECK-VF4IC2-NEXT: [[MIN_VAL_NEXT]] = add i64 [[L]], 1 +; CHECK-VF4IC2-NEXT: [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[SCALAR_RECUR]], i64 [[L]]) +; CHECK-VF4IC2-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-VF4IC2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF4IC2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0 +; CHECK-VF4IC2-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-VF4IC2: exit: +; CHECK-VF4IC2-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC2-NEXT: ret i64 [[RES]] +; +; CHECK-VF1IC2-LABEL: define i64 @test_not_vectorize_select_no_min_reduction +; CHECK-VF1IC2-SAME: (ptr [[SRC:%.*]]) { +; CHECK-VF1IC2-NEXT: entry: +; CHECK-VF1IC2-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF1IC2: vector.ph: +; CHECK-VF1IC2-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-VF1IC2: vector.body: +; CHECK-VF1IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC2-NEXT: [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC2-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC2-NEXT: [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF1IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF1IC2-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP0]] +; CHECK-VF1IC2-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP1]] +; CHECK-VF1IC2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP2]], align 4 +; CHECK-VF1IC2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 4 +; CHECK-VF1IC2-NEXT: [[TMP6:%.*]] = add i64 [[TMP4]], 1 +; CHECK-VF1IC2-NEXT: [[TMP7]] = add i64 [[TMP5]], 1 +; CHECK-VF1IC2-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[VECTOR_RECUR]], [[TMP4]] +; CHECK-VF1IC2-NEXT: [[TMP9:%.*]] = icmp ugt i64 [[TMP6]], [[TMP5]] +; CHECK-VF1IC2-NEXT: [[TMP10]] = select i1 [[TMP8]], i64 [[TMP0]], i64 [[VEC_PHI]] +; CHECK-VF1IC2-NEXT: [[TMP11]] = select i1 [[TMP9]], i64 [[TMP1]], i64 [[VEC_PHI1]] +; CHECK-VF1IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-VF1IC2-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-VF1IC2-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-VF1IC2: middle.block: +; CHECK-VF1IC2-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP10]], i64 [[TMP11]]) +; CHECK-VF1IC2-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX]], -9223372036854775808 +; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX]], i64 0 +; CHECK-VF1IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 +; CHECK-VF1IC2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-VF1IC2: scalar.ph: +; CHECK-VF1IC2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; CHECK-VF1IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-VF1IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF1IC2-NEXT: br label [[LOOP:%.*]] +; CHECK-VF1IC2: loop: +; CHECK-VF1IC2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF1IC2-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF1IC2-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ] +; CHECK-VF1IC2-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-VF1IC2-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-VF1IC2-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SCALAR_RECUR]], [[L]] +; CHECK-VF1IC2-NEXT: [[MIN_VAL_NEXT]] = add i64 [[L]], 1 +; CHECK-VF1IC2-NEXT: [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[SCALAR_RECUR]], i64 [[L]]) +; CHECK-VF1IC2-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-VF1IC2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-VF1IC2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0 +; CHECK-VF1IC2-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-VF1IC2: exit: +; CHECK-VF1IC2-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF1IC2-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -298,3 +465,5 @@ declare i64 @llvm.umin.i64(i64, i64) declare i16 @llvm.umin.i16(i16, i16) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}}