diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -50,8 +50,16 @@ FMulAdd, ///< Fused multiply-add of floats (a * b + c). SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop ///< invariant - SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop + SelectFCmp, ///< Integer select(fcmp(),x,y) where one of (x,y) is loop ///< invariant + InductionIMax, ///< Integer select(icmp(), x, y) where one of (x, y) is + ///< the induction variable to be maximized. + InductionIMin, ///< Integer select(icmp(), x, y) where one of (x, y) is + ///< the induction variable to be minimized. + InductionFMax, ///< Integer select(fcmp(), x, y) where one of (x, y) is + ///< the induction variable to be maximized. + InductionFMin ///< Integer select(fcmp(), x, y) where one of (x, y) is + ///< the induction variable to be minimized. }; /// The RecurrenceDescriptor is used to identify recurrences variables in a @@ -123,7 +131,7 @@ /// the returned struct. static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, - FastMathFlags FuncFMF); + FastMathFlags FuncFMF, ScalarEvolution *SE); /// Returns true if instruction I has multiple uses in Insts static bool hasMultipleUsesOf(Instruction *I, @@ -150,6 +158,16 @@ static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev); + /// Returns a struct describing whether the instruction is either a + /// Select(ICmp(A, B), X, Y), or + /// Select(FCmp(A, B), X, Y) + /// where one of (X, Y) is the loop induction variable and the other is a PHI + /// value. \p Prev specifies the description of an already processed select + /// instruction, so its corresponding cmp can be matched to it. + static InstDesc isInductionMinMaxPattern(Loop *Loop, PHINode *OrigPhi, + Instruction *I, InstDesc &Prev, + ScalarEvolution *SE); + /// Returns a struct describing if the instruction is a /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern. static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I); @@ -237,6 +255,14 @@ return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp; } + /// Returns true if the recurrence kind is of the form + /// select(cmp(),x,y) where one of (x,y) is the loop induction variable. + static bool isInductionMinMaxRecurrenceKind(RecurKind Kind) { + return Kind == RecurKind::InductionIMax || + Kind == RecurKind::InductionIMin || + Kind == RecurKind::InductionFMax || Kind == RecurKind::InductionFMin; + } + /// Returns the type of the recurrence. This type can be narrower than the /// actual type of the Phi if the recurrence has been type-promoted. Type *getRecurrenceType() const { return RecurrenceType; } diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -400,6 +400,15 @@ const RecurrenceDescriptor &Desc, PHINode *OrigPhi); +/// Create a target reduction of the given vector \p Src for a reduction of the +/// kind RecurKind::InductionIMax, RecurKind::InductionIMin, +/// RecurKind::InductionFMax or RecurKind::InductionFMin. The reduction +/// operation is described by \p Desc. +Value *createInductionMinMaxTargetReduction(IRBuilderBase &B, + const TargetTransformInfo *TTI, + Value *Src, + const RecurrenceDescriptor &Desc); + /// Create a generic target reduction using a recurrence descriptor \p Desc /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -54,6 +54,10 @@ case RecurKind::UMin: case RecurKind::SelectICmp: case RecurKind::SelectFCmp: + case RecurKind::InductionIMax: + case RecurKind::InductionIMin: + case RecurKind::InductionFMax: + case RecurKind::InductionFMin: return true; } return false; @@ -375,7 +379,7 @@ // type-promoted). if (Cur != Start) { ReduxDesc = - isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF); + isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE); ExactFPMathInst = ExactFPMathInst == nullptr ? ReduxDesc.getExactFPMathInst() : ExactFPMathInst; @@ -412,6 +416,7 @@ // A reduction operation must only have one use of the reduction value. if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) && !isSelectCmpRecurrenceKind(Kind) && + !isInductionMinMaxRecurrenceKind(Kind) && hasMultipleUsesOf(Cur, VisitedInsts, 1)) return false; @@ -419,10 +424,14 @@ if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts)) return false; - if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) && + if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp || + Kind == RecurKind::InductionIMax || + Kind == RecurKind::InductionIMin) && (isa(Cur) || isa(Cur))) ++NumCmpSelectPatternInst; - if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) && + if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp || + Kind == RecurKind::InductionFMax || + Kind == RecurKind::InductionFMin) && (isa(Cur) || isa(Cur))) ++NumCmpSelectPatternInst; @@ -490,6 +499,8 @@ (!isConditionalRdxPattern(Kind, UI).isRecurrence() && !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal) .isRecurrence() && + !isInductionMinMaxPattern(TheLoop, Phi, UI, IgnoredVal, SE) + .isRecurrence() && !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence()))) return false; @@ -508,7 +519,9 @@ NumCmpSelectPatternInst != 0) return false; - if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1) + if ((isSelectCmpRecurrenceKind(Kind) || + isInductionMinMaxRecurrenceKind(Kind)) && + NumCmpSelectPatternInst != 1) return false; if (IntermediateStore) { @@ -663,6 +676,74 @@ : RecurKind::SelectFCmp); } +// We are looking for loops that do something like this: +// int r = 0; +// for (int i = 0; i < n; i++) { +// if (src[i] > 3) +// r = 2*i; +// } +// int r = 0; +// for (int i = 0; i < n; i++) { +// if (src[i] > 3) +// r = -i; +// } +// where the reduction value (r) is generated from a combination of a min/max +// reduction, depending on the step in the AddRec expression, and a select to +// pick between this reduction and the initial value, depending on whether there +// was an assignment in the loop. +RecurrenceDescriptor::InstDesc +RecurrenceDescriptor::isInductionMinMaxPattern(Loop *Loop, PHINode *OrigPhi, + Instruction *I, InstDesc &Prev, + ScalarEvolution *SE) { + // We must handle the select(cmp(),x,y) as a single instruction. Advance to + // the select. + CmpInst::Predicate Pred; + if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) { + if (auto *Select = dyn_cast(*I->user_begin())) + return InstDesc(Select, Prev.getRecKind()); + } + + // Only match select with single use cmp condition. + if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(), + m_Value()))) + return InstDesc(false, I); + + SelectInst *SI = cast(I); + Value *NonPhi = nullptr; + + // We are looking for selects of the form: + // select(cmp(), phi, SCEVAddRecExpr) or + // select(cmp(), SCEVAddRecExpr, phi) + if (OrigPhi == dyn_cast(SI->getTrueValue())) + NonPhi = SI->getFalseValue(); + else if (OrigPhi == dyn_cast(SI->getFalseValue())) + NonPhi = SI->getTrueValue(); + else + return InstDesc(false, I); + + // If the NonPhi is loop invariant, we should match the SelectCmp pattern + if (Loop->isLoopInvariant(NonPhi)) + return InstDesc(false, I); + + // We require an SCEV AddRec expression with a constant step. If the constant + // step is negative, switch the min and max. + if (!SE->isSCEVable(NonPhi->getType())) + return InstDesc(false, I); + + if (auto *AR = dyn_cast(SE->getSCEV(NonPhi))) + if (auto *ConstStep = dyn_cast(AR->getStepRecurrence(*SE))) { + RecurKind RMin = + (isa(I->getOperand(0)) ? RecurKind::InductionIMin + : RecurKind::InductionFMin); + RecurKind RMax = + (isa(I->getOperand(0)) ? RecurKind::InductionIMax + : RecurKind::InductionFMax); + return InstDesc(I, ConstStep->getAPInt().isNegative() ? RMin : RMax); + } + + return InstDesc(false, I); +} + RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind, const InstDesc &Prev) { @@ -762,10 +843,9 @@ return InstDesc(true, SI); } -RecurrenceDescriptor::InstDesc -RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi, - Instruction *I, RecurKind Kind, - InstDesc &Prev, FastMathFlags FuncFMF) { +RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( + Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev, + FastMathFlags FuncFMF, ScalarEvolution *SE) { assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind); switch (I->getOpcode()) { default: @@ -801,6 +881,8 @@ case Instruction::Call: if (isSelectCmpRecurrenceKind(Kind)) return isSelectCmpPattern(L, OrigPhi, I, Prev); + if (isInductionMinMaxRecurrenceKind(Kind)) + return isInductionMinMaxPattern(L, OrigPhi, I, Prev, SE); if (isIntMinMaxRecurrenceKind(Kind) || (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) || (isa(I) && I->hasNoNaNs() && @@ -892,6 +974,18 @@ << *Phi << "\n"); return true; } + if (AddReductionVar(Phi, RecurKind::InductionIMax, TheLoop, FMF, RedDes, DB, + AC, DT, SE)) { + LLVM_DEBUG(dbgs() << "Found a MAX reduction on loop induction variable PHI." + << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RecurKind::InductionIMin, TheLoop, FMF, RedDes, DB, + AC, DT, SE)) { + LLVM_DEBUG(dbgs() << "Found a MIN reduction on loop induction variable PHI." + << *Phi << "\n"); + return true; + } if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT, SE)) { LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); @@ -918,6 +1012,18 @@ << " PHI." << *Phi << "\n"); return true; } + if (AddReductionVar(Phi, RecurKind::InductionFMax, TheLoop, FMF, RedDes, DB, + AC, DT, SE)) { + LLVM_DEBUG(dbgs() << "Found a MAX reduction on loop induction variable PHI." + << *Phi << "\n"); + return true; + } + if (AddReductionVar(Phi, RecurKind::InductionFMin, TheLoop, FMF, RedDes, DB, + AC, DT, SE)) { + LLVM_DEBUG(dbgs() << "Found a MIN reduction on loop induction variable PHI." + << *Phi << "\n"); + return true; + } if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT, SE)) { LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n"); @@ -1065,6 +1171,10 @@ return ConstantFP::getInfinity(Tp, true /*Negative*/); case RecurKind::SelectICmp: case RecurKind::SelectFCmp: + case RecurKind::InductionIMax: + case RecurKind::InductionIMin: + case RecurKind::InductionFMax: + case RecurKind::InductionFMin: return getRecurrenceStartValue(); break; default: @@ -1094,10 +1204,14 @@ case RecurKind::UMax: case RecurKind::UMin: case RecurKind::SelectICmp: + case RecurKind::InductionIMax: + case RecurKind::InductionIMin: return Instruction::ICmp; case RecurKind::FMax: case RecurKind::FMin: case RecurKind::SelectFCmp: + case RecurKind::InductionFMax: + case RecurKind::InductionFMin: return Instruction::FCmp; default: llvm_unreachable("Unknown recurrence operation"); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1061,6 +1061,76 @@ return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select"); } +Value *llvm::createInductionMinMaxTargetReduction( + IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src, + const RecurrenceDescriptor &Desc) { + assert(RecurrenceDescriptor::isInductionMinMaxRecurrenceKind( + Desc.getRecurrenceKind()) && + "Unexpected reduction kind"); + Value *InitVal = Desc.getRecurrenceStartValue(); + Value *NewVal = nullptr; + + // Create a splat vector with the new value and compare this to the vector + // we want to reduce. + ElementCount EC = cast(Src->getType())->getElementCount(); + Value *Right = Builder.CreateVectorSplat(EC, InitVal); + Value *CmpVector = + Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp"); + Value *Cmp = Builder.CreateOrReduce(CmpVector); + auto IdxTy = cast(InitVal->getType()); + + // Consider the following example: + // int src[n] = {4, 5, 2}; + // int r = 331; + // for (int i = 0; i < n; i++) { + // if (src[i] > 3) + // r = i; + // } + // return r; + // We vectorize this case as follows: + // 1. Create a Splat with the int_min/int_max values, depending on the + // RecurKind. + // 2. The Src is filled with values: {0, 1, 2, 3, 4, ...}. + // 3. The Right is filled with values: {0, 1, 331, 331, 331, ...}. + // 4. The CmpVector is filled with values: {1, 1, 0, 0, 0, ...}. + // 5. Select using this CmpVector between Src and the Splat with + // int_min/int_max values. + // 6. Use a max-reduce/min-reduce on the result of the Select. + // 7. Use the exising Cmp which determines whether or not any assignment + // took place in the loop to select between the result of the + // max-reduce/min-reduce, and the initial value (InitVal). + switch (Desc.getRecurrenceKind()) { + case RecurKind::InductionIMax: + case RecurKind::InductionFMax: { + auto MinValue = Desc.isSigned() + ? APInt::getSignedMinValue(IdxTy->getBitWidth()) + : APInt::getMinValue(IdxTy->getBitWidth()); + Value *MinSplat = + Builder.CreateVectorSplat(EC, ConstantInt::get(IdxTy, MinValue)); + Value *Mask = + Builder.CreateSelect(CmpVector, Src, MinSplat, "rdx.select.mask"); + NewVal = Builder.CreateIntMaxReduce(Mask, Desc.isSigned()); + break; + } + case RecurKind::InductionIMin: + case RecurKind::InductionFMin: { + auto MaxValue = Desc.isSigned() + ? APInt::getSignedMaxValue(IdxTy->getBitWidth()) + : APInt::getMaxValue(IdxTy->getBitWidth()); + Value *MaxSplat = + Builder.CreateVectorSplat(EC, ConstantInt::get(IdxTy, MaxValue)); + Value *Mask = + Builder.CreateSelect(CmpVector, Src, MaxSplat, "rdx.select.mask"); + NewVal = Builder.CreateIntMinReduce(Mask, Desc.isSigned()); + break; + } + default: + llvm_unreachable("Unexpected reduction kind"); + } + + return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select"); +} + Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind) { @@ -1112,6 +1182,8 @@ RecurKind RK = Desc.getRecurrenceKind(); if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi); + if (RecurrenceDescriptor::isInductionMinMaxRecurrenceKind(RK)) + return createInductionMinMaxTargetReduction(B, TTI, Src, Desc); return createSimpleTargetReduction(B, TTI, Src, RK); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3977,7 +3977,8 @@ if (Op != Instruction::ICmp && Op != Instruction::FCmp) { ReducedPartRdx = Builder.CreateBinOp( (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx"); - } else if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) + } else if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK) || + RecurrenceDescriptor::isInductionMinMaxRecurrenceKind(RK)) ReducedPartRdx = createSelectCmpOp(Builder, ReductionStartValue, RK, ReducedPartRdx, RdxPart); else @@ -5891,7 +5892,9 @@ any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool { const RecurrenceDescriptor &RdxDesc = Reduction.second; return RecurrenceDescriptor::isSelectCmpRecurrenceKind( - RdxDesc.getRecurrenceKind()); + RdxDesc.getRecurrenceKind()) || + RecurrenceDescriptor::isInductionMinMaxRecurrenceKind( + RdxDesc.getRecurrenceKind()); }); if (HasSelectCmpReductions) { LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n"); @@ -8839,6 +8842,7 @@ // For min/max reductions, where we have a pair of icmp/select, we also // need to record the ICmp recipe, so it can be removed later. assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) && + !RecurrenceDescriptor::isInductionMinMaxRecurrenceKind(Kind) && "Only min/max recurrences allowed for inloop reductions"); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) RecipeBuilder.recordRecipeOf(cast(R->getOperand(0))); @@ -9123,6 +9127,7 @@ VPValue *ChainOp = Plan->getVPValue(Chain); unsigned FirstOpId; assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) && + !RecurrenceDescriptor::isInductionMinMaxRecurrenceKind(Kind) && "Only min/max recurrences allowed for inloop reductions"); // Recognize a call to the llvm.fmuladd intrinsic. bool IsFMulAdd = (Kind == RecurKind::FMulAdd); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1294,7 +1294,8 @@ Value *Iden = nullptr; RecurKind RK = RdxDesc.getRecurrenceKind(); if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || - RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { + RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK) || + RecurrenceDescriptor::isInductionMinMaxRecurrenceKind(RK)) { // MinMax reduction have the start value as their identify. if (ScalarPHI) { Iden = StartV; diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -912,9 +912,10 @@ @table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1 -; CHECK-LABEL: @non_reduction_index( -; CHECK-NOT: <4 x i16> -define i16 @non_reduction_index(i16 noundef %val) { +; CHECK-LABEL: @reduction_index( +; CHECK: %[[V1:.*]] = insertelement <4 x i16> poison, i16 %[[ARG:.*]], i64 0 +; CHECK: shufflevector <4 x i16> %[[V1]], <4 x i16> poison, <4 x i32> zeroinitializer +define i16 @reduction_index(i16 noundef %val) { entry: br label %for.body @@ -936,9 +937,10 @@ @tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1 -; CHECK-LABEL: @non_reduction_index_half( -; CHECK-NOT: <4 x half> -define i16 @non_reduction_index_half(half noundef %val) { +; CHECK-LABEL: @reduction_index_half( +; CHECK: %[[V1:.*]] = insertelement <4 x half> poison, half %[[ARG:.*]], i64 0 +; CHECK: shufflevector <4 x half> %[[V1]], <4 x half> poison, <4 x i32> zeroinitializer +define i16 @reduction_index_half(half noundef %val) { entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/induction-min-max.ll b/llvm/test/Transforms/LoopVectorize/induction-min-max.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/induction-min-max.ll @@ -0,0 +1,295 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize,instcombine,dce -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s + +define i32 @test_induction_icmp_max(ptr %a) { +; CHECK-LABEL: @test_induction_icmp_max( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP3]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[RDX_SELECT_CMP]] to i4 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP5]], 0 +; CHECK-NEXT: [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_SELECT_MASK]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[DOTNOT]], i32 [[TMP0]], i32 [[TMP6]] +; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; +entry: + %0 = load i32, ptr %a + br label %for.body + +for.cond.cleanup: + ret i32 %spec.select + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %k.07 = phi i32 [ %0, %entry ], [ %spec.select, %for.body ] + %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %1 = load i32, ptr %arrayidx1 + %cmp2 = icmp slt i32 %1, 0 + %2 = trunc i64 %indvars.iv to i32 + %spec.select = select i1 %cmp2, i32 %2, i32 %k.07 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 32000 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define i32 @test_induction_icmp_min_negative_addrec(ptr %a) { +; CHECK-LABEL: @test_induction_icmp_min_negative_addrec( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i1> [[RDX_SELECT_CMP]] to i4 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP6]], 0 +; CHECK-NEXT: [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i32> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[RDX_SELECT_MASK]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[DOTNOT]], i32 [[TMP0]], i32 [[TMP7]] +; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; +entry: + %0 = load i32, ptr %a + br label %for.body + +for.cond.cleanup: + ret i32 %spec.select + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %k.07 = phi i32 [ %0, %entry ], [ %spec.select, %for.body ] + %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %1 = load i32, ptr %arrayidx1 + %cmp2 = icmp slt i32 %1, 0 + %2 = trunc i64 %indvars.iv to i32 + %3 = mul i32 %2, -2 + %spec.select = select i1 %cmp2, i32 %3, i32 %k.07 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 32000 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +define i32 @test_induction_icmp_max_negative_addrec(ptr %a) { +; CHECK-LABEL: @test_induction_icmp_max_negative_addrec( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 31999, [[INDEX]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[REVERSE]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> zeroinitializer, [[VEC_IND]] +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i1> [[RDX_SELECT_CMP]] to i4 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP6]], 0 +; CHECK-NEXT: [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_SELECT_MASK]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[DOTNOT]], i32 -1, i32 [[TMP7]] +; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret i32 %spec.select + +for.body: + %indvars.iv = phi i64 [ 31999, %entry ], [ %indvars.iv.next, %for.body ] + %k.05 = phi i32 [ -1, %entry ], [ %spec.select, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %0 = load i32, ptr %arrayidx + %cmp1 = icmp slt i32 %0, 0 + %1 = trunc i64 %indvars.iv to i32 + %2 = sub i32 0, %1 + %spec.select = select i1 %cmp1, i32 %2, i32 %k.05 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp.not = icmp eq i64 %indvars.iv, 0 + br i1 %cmp.not, label %for.cond.cleanup, label %for.body +} + +define i32 @test_induction_icmp_min(ptr %a) { +; CHECK-LABEL: @test_induction_icmp_min( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 4 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 31999, [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -3 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[REVERSE]], zeroinitializer +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i1> [[RDX_SELECT_CMP]] to i4 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP6]], 0 +; CHECK-NEXT: [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i32> [[TMP4]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[RDX_SELECT_MASK]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[DOTNOT]], i32 [[TMP0]], i32 [[TMP7]] +; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; +entry: + %0 = load i32, ptr %a + br label %for.body + +for.cond.cleanup: + ret i32 %spec.select + +for.body: + %indvars.iv = phi i64 [ 31999, %entry ], [ %indvars.iv.next, %for.body ] + %k.07 = phi i32 [ %0, %entry ], [ %spec.select, %for.body ] + %arrayidx1 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + %1 = load i32, ptr %arrayidx1 + %cmp2 = icmp slt i32 %1, 0 + %2 = trunc i64 %indvars.iv to i32 + %spec.select = select i1 %cmp2, i32 %2, i32 %k.07 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %cmp.not = icmp eq i64 %indvars.iv, 0 + br i1 %cmp.not, label %for.cond.cleanup, label %for.body +} + +define i32 @test_induction_fcmp_max(ptr %a) { +; CHECK-LABEL: @test_induction_fcmp_max( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[RDX_SELECT_CMP]] to i4 +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP4]], 0 +; CHECK-NEXT: [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i32> [[TMP2]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_SELECT_MASK]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[DOTNOT]], i32 -1, i32 [[TMP5]] +; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[K_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[K_1_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret i32 %k.1 + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %k.05 = phi i32 [ -1, %entry ], [ %k.1, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv + %0 = load float, ptr %arrayidx + %cmp1 = fcmp olt float %0, 0.000000e+00 + %1 = trunc i64 %indvars.iv to i32 + %k.1 = select i1 %cmp1, i32 %1, i32 %k.05 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 32000 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} diff --git a/llvm/test/Transforms/LoopVectorize/select-min-index.ll b/llvm/test/Transforms/LoopVectorize/select-min-index.ll --- a/llvm/test/Transforms/LoopVectorize/select-min-index.ll +++ b/llvm/test/Transforms/LoopVectorize/select-min-index.ll @@ -1,6 +1,4 @@ ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s -; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s -; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s ; Test cases for selecting the index with the minimum value. @@ -81,9 +79,60 @@ ret i64 %res } -define i64 @test_not_vectorize_select_no_min_reduction(ptr %src) { -; CHECK-LABEL: @test_not_vectorize_select_no_min_reduction( -; CHECK-NOT: vector.body: +define i64 @test_vectorize_select_no_min_reduction(ptr %src) { +; CHECK-LABEL: define i64 @test_vectorize_select_no_min_reduction +; CHECK-SAME: (ptr [[SRC:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3]] = add <4 x i64> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <4 x i64> [[TMP4]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i64> [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i64> [[TMP6]], <4 x i64> zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[RDX_SELECT_MASK]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[SCALAR_RECUR]], [[L]] +; CHECK-NEXT: [[MIN_VAL_NEXT]] = add i64 [[L]], 1 +; CHECK-NEXT: [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[SCALAR_RECUR]], i64 [[L]]) +; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[RES]] ; entry: br label %loop @@ -107,7 +156,6 @@ ret i64 %res } - define i64 @test_not_vectorize_cmp_value(i64 %x) { ; CHECK-LABEL: @test_not_vectorize_cmp_value( ; CHECK-NOT: vector.body: