diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -57,11 +57,17 @@ IFindLastIV, ///< FindLast reduction with select(icmp(),x,y) where one of ///< (x,y) is increasing loop induction PHI, and both x and y are ///< integer type. - FFindLastIV ///< FindLast reduction with select(fcmp(),x,y) where one of (x,y) - ///< is increasing loop induction PHI, and both x and y are - ///< integer type. - // TODO: Any_of and FindLast reduction need not be restricted to integer type - // only. + FFindLastIV, ///< FindLast reduction with select(fcmp(),x,y) where one of + ///< (x,y) is increasing loop induction PHI, and both x and y are + ///< integer type. + IFindFirstIV, ///< FindFirst reduction with select(icmp(),x,y) where one of + ///< (x,y) is decreasing loop induction PHI, and both x and y + ///< are integer type. + FFindFirstIV ///< FindFirst reduction with select(fcmp(),x,y) where one of + ///< (x,y) is decreasing loop induction PHI, and both x and y are + ///< integer type. + // TODO: Any_of, FindLast, and FindFirst reduction need not be restricted to + // integer type only. }; /// The RecurrenceDescriptor is used to identify recurrences variables in a @@ -163,15 +169,13 @@ /// Returns a struct describing whether the instruction is either a /// Select(ICmp(A, B), X, Y), or /// Select(FCmp(A, B), X, Y) - /// where one of (X, Y) is an increasing loop induction variable, and the - /// other is a PHI value. \p Prev specifies the description of an already - /// processed select instruction, so its corresponding cmp can be matched to - /// it. - // TODO: FindLast does not need be restricted to increasing loop induction - // variables. - static InstDesc isFindLastIVPattern(Loop *Loop, PHINode *OrigPhi, - Instruction *I, InstDesc &Prev, - ScalarEvolution *SE); + /// where one of (X, Y) is an increasing/decreasing loop induction variable, + /// and the other is a PHI value. \p Prev specifies the description of an + /// already processed select instruction, so its corresponding cmp can be + /// matched to it. + static InstDesc isFindFirstLastIVPattern(Loop *Loop, PHINode *OrigPhi, + Instruction *I, InstDesc &Prev, + ScalarEvolution *SE); /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. @@ -267,6 +271,12 @@ return Kind == RecurKind::IFindLastIV || Kind == RecurKind::FFindLastIV; } + /// Returns true if the recurrence kind is of the form + /// select(cmp(),x,y) where one of (x,y) is decreasing loop induction. + static bool isFindFirstIVRecurrenceKind(RecurKind Kind) { + return Kind == RecurKind::IFindFirstIV || Kind == RecurKind::FFindFirstIV; + } + /// Returns the type of the recurrence. This type can be narrower than the /// actual type of the Phi if the recurrence has been type-promoted. Type *getRecurrenceType() const { return RecurrenceType; } diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -378,6 +378,12 @@ /// \p Left and \p Right. Value *createFindLastIVOp(IRBuilderBase &Builder, Value *Left, Value *Right); +/// See RecurrenceDescriptor::isFindFirstIVPattern for a description of the +/// pattern we are trying to match. In this pattern, since the selected set of +/// values forms an decreasing sequence, we are selecting the minimum value from +/// \p Left and \p Right. +Value *createFindFirstIVOp(IRBuilderBase &Builder, Value *Left, Value *Right); + /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. /// The Builder's fast-math-flags must be set to propagate the expected values. Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, @@ -418,6 +424,14 @@ Value *Src, const RecurrenceDescriptor &Desc); +/// Create a target reduction of the given vector \p Src for a reduction of the +/// kind RecurKind::IFindFirstIV or RecurKind::FFindFirstIV. The reduction +/// operation is described by \p Desc. +Value *createFindFirstIVTargetReduction(IRBuilderBase &B, + const TargetTransformInfo *TTI, + Value *Src, + const RecurrenceDescriptor &Desc); + /// Create a generic target reduction using a recurrence descriptor \p Desc /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -56,6 +56,8 @@ case RecurKind::FAnyOf: case RecurKind::IFindLastIV: case RecurKind::FFindLastIV: + case RecurKind::IFindFirstIV: + case RecurKind::FFindFirstIV: return true; } return false; @@ -414,6 +416,7 @@ // A reduction operation must only have one use of the reduction value. if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) && !isAnyOfRecurrenceKind(Kind) && !isFindLastIVRecurrenceKind(Kind) && + !isFindFirstIVRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1)) return false; @@ -422,11 +425,11 @@ return false; if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::IAnyOf || - Kind == RecurKind::IFindLastIV) && + Kind == RecurKind::IFindLastIV || Kind == RecurKind::IFindFirstIV) && (isa(Cur) || isa(Cur))) ++NumCmpSelectPatternInst; if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::FAnyOf || - Kind == RecurKind::FFindLastIV) && + Kind == RecurKind::FFindLastIV || Kind == RecurKind::FFindFirstIV) && (isa(Cur) || isa(Cur))) ++NumCmpSelectPatternInst; @@ -494,7 +497,7 @@ (!isConditionalRdxPattern(Kind, UI).isRecurrence() && !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal) .isRecurrence() && - !isFindLastIVPattern(TheLoop, Phi, UI, IgnoredVal, SE) + !isFindFirstLastIVPattern(TheLoop, Phi, UI, IgnoredVal, SE) .isRecurrence() && !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence()))) return false; @@ -514,7 +517,8 @@ NumCmpSelectPatternInst != 0) return false; - if ((isAnyOfRecurrenceKind(Kind) || isFindLastIVRecurrenceKind(Kind)) && + if ((isAnyOfRecurrenceKind(Kind) || isFindLastIVRecurrenceKind(Kind) || + isFindFirstIVRecurrenceKind(Kind)) && NumCmpSelectPatternInst != 1) return false; @@ -613,6 +617,8 @@ return true; } +enum class LoopInductionVariable { None, Increasing, Decreasing }; + // We are looking for loops that do something like this: // The reduction value (r) only has two states, in this example 0 or 3. // int r = 0; @@ -698,9 +704,9 @@ // value of the data type or a non-constant value by using mask and multiple // reduction operations. RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isFindLastIVPattern(Loop *Loop, PHINode *OrigPhi, - Instruction *I, InstDesc &Prev, - ScalarEvolution *SE) { +RecurrenceDescriptor::isFindFirstLastIVPattern(Loop *Loop, PHINode *OrigPhi, + Instruction *I, InstDesc &Prev, + ScalarEvolution *SE) { // We must handle the select(cmp(),x,y) as a single instruction. Advance to // the select. CmpInst::Predicate Pred; @@ -724,37 +730,49 @@ else return InstDesc(false, I); - auto IsIncreasingLoopInduction = [&SE, &Loop](Value *V) { + auto GetLoopInduction = [&SE, &Loop](Value *V) { auto *Phi = dyn_cast(V); - if (!Phi) - return false; - - if (!SE) - return false; + if (!SE || !Phi) + return LoopInductionVariable::None; InductionDescriptor ID; if (!InductionDescriptor::isInductionPHI(Phi, Loop, SE, ID)) - return false; + return LoopInductionVariable::None; - const SCEVAddRecExpr *AR = cast(SE->getSCEV(Phi)); + const auto *AR = cast(SE->getSCEV(V)); if (!AR->hasNoSignedWrap()) - return false; + return LoopInductionVariable::None; - ConstantInt *IVStartValue = dyn_cast(ID.getStartValue()); - if (!IVStartValue || IVStartValue->isMinSignedValue()) - return false; + const auto *IVStartValue = dyn_cast(ID.getStartValue()); + if (!IVStartValue) + return LoopInductionVariable::None; const SCEV *Step = ID.getStep(); - return SE->isKnownPositive(Step); + if (SE->isKnownPositive(Step) && + !IVStartValue->isMinValue(/* Signed */ true)) + return LoopInductionVariable::Increasing; + else if (SE->isKnownNegative(Step) && + !IVStartValue->isMaxValue(/* Signed */ true)) + return LoopInductionVariable::Decreasing; + else + return LoopInductionVariable::None; }; // We are looking for selects of the form: // select(cmp(), phi, loop_induction) or // select(cmp(), loop_induction, phi) - if (IsIncreasingLoopInduction(NonRdxPhi)) + switch (GetLoopInduction(NonRdxPhi)) { + case LoopInductionVariable::Increasing: return InstDesc(I, isa(I->getOperand(0)) ? RecurKind::IFindLastIV : RecurKind::FFindLastIV); + case LoopInductionVariable::Decreasing: + return InstDesc(I, isa(I->getOperand(0)) + ? RecurKind::IFindFirstIV + : RecurKind::FFindFirstIV); + case LoopInductionVariable::None: + break; + } return InstDesc(false, I); } @@ -900,8 +918,8 @@ case Instruction::Call: if (isAnyOfRecurrenceKind(Kind)) return isAnyOfPattern(L, OrigPhi, I, Prev); - if (isFindLastIVRecurrenceKind(Kind)) - return isFindLastIVPattern(L, OrigPhi, I, Prev, SE); + if (isFindLastIVRecurrenceKind(Kind) || isFindFirstIVRecurrenceKind(Kind)) + return isFindFirstLastIVPattern(L, OrigPhi, I, Prev, SE); auto HasRequiredFMF = [&]() { if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) return true; @@ -1197,6 +1215,9 @@ case RecurKind::IFindLastIV: case RecurKind::FFindLastIV: return getRecurrenceIdentity(RecurKind::SMax, Tp, FMF); + case RecurKind::IFindFirstIV: + case RecurKind::FFindFirstIV: + return getRecurrenceIdentity(RecurKind::SMin, Tp, FMF); default: llvm_unreachable("Unknown recurrence kind"); } @@ -1225,6 +1246,7 @@ case RecurKind::UMin: case RecurKind::IAnyOf: case RecurKind::IFindLastIV: + case RecurKind::IFindFirstIV: return Instruction::ICmp; case RecurKind::FMax: case RecurKind::FMin: @@ -1232,6 +1254,7 @@ case RecurKind::FMinimum: case RecurKind::FAnyOf: case RecurKind::FFindLastIV: + case RecurKind::FFindFirstIV: return Instruction::FCmp; default: llvm_unreachable("Unknown recurrence operation"); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -951,6 +951,11 @@ return createMinMaxOp(Builder, RecurKind::SMax, Left, Right); } +Value *llvm::createFindFirstIVOp(IRBuilderBase &Builder, Value *Left, + Value *Right) { + return createMinMaxOp(Builder, RecurKind::SMin, Left, Right); +} + Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right) { Type *Ty = Left->getType(); @@ -1083,6 +1088,15 @@ return Builder.CreateIntMaxReduce(Src, true); } +Value *llvm::createFindFirstIVTargetReduction( + IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src, + const RecurrenceDescriptor &Desc) { + assert(RecurrenceDescriptor::isFindFirstIVRecurrenceKind( + Desc.getRecurrenceKind()) && + "Unexpected reduction kind"); + return Builder.CreateIntMinReduce(Src, true); +} + Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind) { @@ -1140,6 +1154,8 @@ return createAnyOfTargetReduction(B, TTI, Src, Desc, OrigPhi); if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) return createFindLastIVTargetReduction(B, TTI, Src, Desc); + if (RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK)) + return createFindFirstIVTargetReduction(B, TTI, Src, Desc); return createSimpleTargetReduction(B, TTI, Src, RK); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4004,6 +4004,8 @@ ReducedPartRdx, RdxPart); else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) ReducedPartRdx = createFindLastIVOp(Builder, ReducedPartRdx, RdxPart); + else if (RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK)) + ReducedPartRdx = createFindFirstIVOp(Builder, ReducedPartRdx, RdxPart); else ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart); } @@ -4022,7 +4024,8 @@ : Builder.CreateZExt(ReducedPartRdx, PhiTy); } - if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) + if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) || + RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK)) ReducedPartRdx = createSentinelValueHandling(Builder, TTI, RdxDesc, ReducedPartRdx); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14139,6 +14139,8 @@ case RecurKind::FAnyOf: case RecurKind::IFindLastIV: case RecurKind::FFindLastIV: + case RecurKind::IFindFirstIV: + case RecurKind::FFindFirstIV: case RecurKind::None: llvm_unreachable("Unexpected reduction kind for repeated scalar."); } @@ -14230,6 +14232,8 @@ case RecurKind::FAnyOf: case RecurKind::IFindLastIV: case RecurKind::FFindLastIV: + case RecurKind::IFindFirstIV: + case RecurKind::FFindFirstIV: case RecurKind::None: llvm_unreachable("Unexpected reduction kind for reused scalars."); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1306,12 +1306,13 @@ StartV = Iden = Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); } - } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { - // [I|F]FindLastIV will use a sentinel value as the identity to initialize - // the reduction phi. In the middle block, createSentinelValueHandling will - // generate checks to verify if the reduction result is the sentinel value. - // If the result is the sentinel value, it will be corrected back to the - // start value. + } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) || + RecurrenceDescriptor::isFindFirstIVRecurrenceKind(RK)) { + // [I|F]Find[Last|First]IV will use a sentinel value as the identity to + // initialize the reduction phi. In the middle block, + // createSentinelValueHandling will generate checks to verify if the + // reduction result is the sentinel value. If the result is the sentinel + // value, it will be corrected back to the start value. // TODO: The sentinel value is not always necessary. When the start value is // a constant, and smaller than the start value of the induction variable, // the start value can be directly used to initialize the reduction phi. diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -1747,6 +1747,214 @@ ret i64 %cond } +define i64 @select_decreasing_induction_icmp_const_start(ptr nocapture readonly %a) { +; CHECK-VF4IC1-LABEL: define i64 @select_decreasing_induction_icmp_const_start +; CHECK-VF4IC1-SAME: (ptr nocapture readonly [[A:%.*]]) { +; CHECK-VF4IC1-NEXT: entry: +; CHECK-VF4IC1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF4IC1: vector.ph: +; CHECK-VF4IC1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-VF4IC1: vector.body: +; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] +; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 +; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 -3 +; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 +; CHECK-VF4IC1-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> +; CHECK-VF4IC1-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i64> [[REVERSE]], +; CHECK-VF4IC1-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 +; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-VF4IC1: middle.block: +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[TMP5]]) +; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP7]], 9223372036854775807 +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP7]], i64 331 +; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 20000, 20000 +; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-VF4IC1: scalar.ph: +; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 19999, [[ENTRY:%.*]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-VF4IC1: for.body: +; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], [[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP8]], 3 +; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i64 [[IV]], i64 [[RDX]] +; CHECK-VF4IC1-NEXT: [[DEC]] = add nsw i64 [[IV]], -1 +; CHECK-VF4IC1-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-VF4IC1-NEXT: br i1 [[CMP_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-VF4IC1: exit: +; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; +; CHECK-VF4IC4-LABEL: define i64 @select_decreasing_induction_icmp_const_start +; CHECK-VF4IC4-SAME: (ptr nocapture readonly [[A:%.*]]) { +; CHECK-VF4IC4-NEXT: entry: +; CHECK-VF4IC4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF4IC4: vector.ph: +; CHECK-VF4IC4-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-VF4IC4: vector.body: +; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], +; CHECK-VF4IC4-NEXT: [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], +; CHECK-VF4IC4-NEXT: [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], +; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] +; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -4 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -8 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -12 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 -3 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 +; CHECK-VF4IC4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 -4 +; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 -3 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 +; CHECK-VF4IC4-NEXT: [[REVERSE8:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD7]], <4 x i64> poison, <4 x i32> +; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 -8 +; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 -3 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP13]], align 8 +; CHECK-VF4IC4-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD9]], <4 x i64> poison, <4 x i32> +; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 -12 +; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 -3 +; CHECK-VF4IC4-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x i64>, ptr [[TMP15]], align 8 +; CHECK-VF4IC4-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD11]], <4 x i64> poison, <4 x i32> +; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = icmp sgt <4 x i64> [[REVERSE]], +; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = icmp sgt <4 x i64> [[REVERSE8]], +; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = icmp sgt <4 x i64> [[REVERSE10]], +; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = icmp sgt <4 x i64> [[REVERSE12]], +; CHECK-VF4IC4-NEXT: [[TMP20]] = select <4 x i1> [[TMP16]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-VF4IC4-NEXT: [[TMP21]] = select <4 x i1> [[TMP17]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI4]] +; CHECK-VF4IC4-NEXT: [[TMP22]] = select <4 x i1> [[TMP18]], <4 x i64> [[STEP_ADD1]], <4 x i64> [[VEC_PHI5]] +; CHECK-VF4IC4-NEXT: [[TMP23]] = select <4 x i1> [[TMP19]], <4 x i64> [[STEP_ADD2]], <4 x i64> [[VEC_PHI6]] +; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], +; CHECK-VF4IC4-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 +; CHECK-VF4IC4-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-VF4IC4: middle.block: +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[TMP20]], <4 x i64> [[TMP21]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX13:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX]], <4 x i64> [[TMP22]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX14:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[RDX_MINMAX13]], <4 x i64> [[TMP23]]) +; CHECK-VF4IC4-NEXT: [[TMP25:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[RDX_MINMAX14]]) +; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP25]], 9223372036854775807 +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP25]], i64 331 +; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 20000, 20000 +; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-VF4IC4: scalar.ph: +; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 19999, [[ENTRY:%.*]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-VF4IC4: for.body: +; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], [[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-VF4IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3 +; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i64 [[IV]], i64 [[RDX]] +; CHECK-VF4IC4-NEXT: [[DEC]] = add nsw i64 [[IV]], -1 +; CHECK-VF4IC4-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-VF4IC4-NEXT: br i1 [[CMP_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-VF4IC4: exit: +; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; +; CHECK-VF1IC4-LABEL: define i64 @select_decreasing_induction_icmp_const_start +; CHECK-VF1IC4-SAME: (ptr nocapture readonly [[A:%.*]]) { +; CHECK-VF1IC4-NEXT: entry: +; CHECK-VF1IC4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF1IC4: vector.ph: +; CHECK-VF1IC4-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-VF1IC4: vector.body: +; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 9223372036854775807, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 9223372036854775807, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i64 [ 9223372036854775807, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i64 [ 9223372036854775807, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] +; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], -1 +; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], -2 +; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -3 +; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] +; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 8 +; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = icmp sgt i64 [[TMP8]], 3 +; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = icmp sgt i64 [[TMP9]], 3 +; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = icmp sgt i64 [[TMP10]], 3 +; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = icmp sgt i64 [[TMP11]], 3 +; CHECK-VF1IC4-NEXT: [[TMP16]] = select i1 [[TMP12]], i64 [[TMP0]], i64 [[VEC_PHI]] +; CHECK-VF1IC4-NEXT: [[TMP17]] = select i1 [[TMP13]], i64 [[TMP1]], i64 [[VEC_PHI1]] +; CHECK-VF1IC4-NEXT: [[TMP18]] = select i1 [[TMP14]], i64 [[TMP2]], i64 [[VEC_PHI2]] +; CHECK-VF1IC4-NEXT: [[TMP19]] = select i1 [[TMP15]], i64 [[TMP3]], i64 [[VEC_PHI3]] +; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 +; CHECK-VF1IC4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-VF1IC4: middle.block: +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP16]], i64 [[TMP17]]) +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX]], i64 [[TMP18]]) +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i64 @llvm.smin.i64(i64 [[RDX_MINMAX4]], i64 [[TMP19]]) +; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX5]], 9223372036854775807 +; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX5]], i64 331 +; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 20000, 20000 +; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-VF1IC4: scalar.ph: +; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, [[MIDDLE_BLOCK]] ], [ 19999, [[ENTRY:%.*]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 331, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-VF1IC4: for.body: +; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], [[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP21]], 3 +; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i64 [[IV]], i64 [[RDX]] +; CHECK-VF1IC4-NEXT: [[DEC]] = add nsw i64 [[IV]], -1 +; CHECK-VF1IC4-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-VF1IC4-NEXT: br i1 [[CMP_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-VF1IC4: exit: +; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: ret i64 [[SPEC_SELECT_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 19999, %entry ], [ %dec, %for.body ] + %rdx = phi i64 [ 331, %entry ], [ %spec.select, %for.body ] + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv + %0 = load i64, ptr %arrayidx, align 8 + %cmp = icmp sgt i64 %0, 3 + %spec.select = select i1 %cmp, i64 %iv, i64 %rdx + %dec = add nsw i64 %iv, -1 + %cmp.not = icmp eq i64 %iv, 0 + br i1 %cmp.not, label %exit, label %for.body + +exit: ; preds = %for.body + ret i64 %spec.select +} + ; Negative tests define float @not_vectorized_select_float_induction_icmp(ptr nocapture readonly %a, ptr nocapture readonly %b, float %rdx.start, i64 %n) { @@ -1775,28 +1983,6 @@ ret float %cond } -define i64 @not_vectorized_select_decreasing_induction_icmp_const_start(ptr nocapture readonly %a) { -; CHECK-LABEL: @not_vectorized_select_decreasing_induction_icmp_const_start -; CHECK-NOT: vector.body: -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 19999, %entry ], [ %dec, %for.body ] - %rdx = phi i64 [ 331, %entry ], [ %spec.select, %for.body ] - %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv - %0 = load i64, ptr %arrayidx, align 8 - %cmp = icmp sgt i64 %0, 3 - %spec.select = select i1 %cmp, i64 %iv, i64 %rdx - %dec = add nsw i64 %iv, -1 - %cmp.not = icmp eq i64 %iv, 0 - br i1 %cmp.not, label %exit, label %for.body - -exit: ; preds = %for.body - ret i64 %spec.select -} - define i64 @not_vectorized_select_decreasing_induction_icmp_non_const_start(ptr nocapture readonly %a, ptr nocapture readonly %b, i64 %rdx.start, i64 %n) { ; CHECK-LABEL: @not_vectorized_select_decreasing_induction_icmp_non_const_start ; CHECK-NOT: vector.body: